diff options
647 files changed, 33114 insertions, 14101 deletions
diff --git a/Documentation/devicetree/bindings/net/meson-dwmac.txt b/Documentation/devicetree/bindings/net/meson-dwmac.txt index 61cada22ae6c..1321bb194ed9 100644 --- a/Documentation/devicetree/bindings/net/meson-dwmac.txt +++ b/Documentation/devicetree/bindings/net/meson-dwmac.txt @@ -11,6 +11,7 @@ Required properties on all platforms: - "amlogic,meson8b-dwmac" - "amlogic,meson8m2-dwmac" - "amlogic,meson-gxbb-dwmac" + - "amlogic,meson-axg-dwmac" Additionally "snps,dwmac" and any applicable more detailed version number described in net/stmmac.txt should be used. diff --git a/Documentation/devicetree/bindings/net/microchip,lan78xx.txt b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt new file mode 100644 index 000000000000..76786a0f6d3d --- /dev/null +++ b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt @@ -0,0 +1,54 @@ +Microchip LAN78xx Gigabit Ethernet controller + +The LAN78XX devices are usually configured by programming their OTP or with +an external EEPROM, but some platforms (e.g. Raspberry Pi 3 B+) have neither. +The Device Tree properties, if present, override the OTP and EEPROM. + +Required properties: +- compatible: Should be one of "usb424,7800", "usb424,7801" or "usb424,7850". + +Optional properties: +- local-mac-address: see ethernet.txt +- mac-address: see ethernet.txt + +Optional properties of the embedded PHY: +- microchip,led-modes: a 0..4 element vector, with each element configuring + the operating mode of an LED. Omitted LEDs are turned off. Allowed values + are defined in "include/dt-bindings/net/microchip-lan78xx.h". + +Example: + +/* Based on the configuration for a Raspberry Pi 3 B+ */ +&usb { + usb-port@1 { + compatible = "usb424,2514"; + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + usb-port@1 { + compatible = "usb424,2514"; + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + ethernet: ethernet@1 { + compatible = "usb424,7800"; + reg = <1>; + local-mac-address = [ 00 11 22 33 44 55 ]; + + mdio { + #address-cells = <0x1>; + #size-cells = <0x0>; + eth_phy: ethernet-phy@1 { + reg = <1>; + microchip,led-modes = < + LAN78XX_LINK_1000_ACTIVITY + LAN78XX_LINK_10_100_ACTIVITY + >; + }; + }; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt index 96398cc2982f..fc8f01718690 100644 --- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt +++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt @@ -13,13 +13,25 @@ Required properties: - reg: Address where registers are mapped and size of region. - interrupts: Should contain the MAC interrupt. - phy-mode: See ethernet.txt in the same directory. Allow to choose - "rgmii", "rmii", or "mii" according to the PHY. + "rgmii", "rmii", "mii", or "internal" according to the PHY. + The acceptable mode is SoC-dependent. - phy-handle: Should point to the external phy device. See ethernet.txt file in the same directory. - clocks: A phandle to the clock for the MAC. + For Pro4 SoC, that is "socionext,uniphier-pro4-ave4", + another MAC clock, GIO bus clock and PHY clock are also required. + - clock-names: Should contain + - "ether", "ether-gb", "gio", "ether-phy" for Pro4 SoC + - "ether" for others + - resets: A phandle to the reset control for the MAC. For Pro4 SoC, + GIO bus reset is also required. + - reset-names: Should contain + - "ether", "gio" for Pro4 SoC + - "ether" for others + - socionext,syscon-phy-mode: A phandle to syscon with one argument + that configures phy mode. The argument is the ID of MAC instance. Optional properties: - - resets: A phandle to the reset control for the MAC. - local-mac-address: See ethernet.txt in the same directory. Required subnode: @@ -34,8 +46,11 @@ Example: interrupts = <0 66 4>; phy-mode = "rgmii"; phy-handle = <ðphy>; + clock-names = "ether"; clocks = <&sys_clk 6>; + reset-names = "ether"; resets = <&sys_rst 6>; + socionext,syscon-phy-mode = <&soc_glue 0>; local-mac-address = [00 00 00 00 00 00]; mdio { diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt index 77cd42cc5f54..b025770eeb92 100644 --- a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt +++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt @@ -17,7 +17,8 @@ pool management. Required properties: -- compatible : Must be "ti,keystone-navigator-qmss"; +- compatible : Must be "ti,keystone-navigator-qmss". + : Must be "ti,66ak2g-navss-qm" for QMSS on K2G SoC. - clocks : phandle to the reference clock for this device. - queue-range : <start number> total range of queue numbers for the device. - linkram0 : <address size> for internal link ram, where size is the total @@ -39,6 +40,12 @@ Required properties: - Descriptor memory setup region. - Queue Management/Queue Proxy region for queue Push. - Queue Management/Queue Proxy region for queue Pop. + +For QMSS on K2G SoC, following QM reg indexes are used in that order + - Queue Peek region. + - Queue configuration region. + - Queue Management/Queue Proxy region for queue Push/Pop. + - queue-pools : child node classifying the queue ranges into pools. Queue ranges are grouped into 3 type of pools: - qpend : pool of qpend(interruptible) queues diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 5efae00f6c7f..d2963123eb1c 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt @@ -5,6 +5,7 @@ Written 1996 by Gero Kuhlmann <gero@gkminix.han.de> Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz> Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org> Updated 2006 by Horms <horms@verge.net.au> +Updated 2018 by Chris Novakovic <chris@chrisn.me.uk> @@ -79,7 +80,7 @@ nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>] ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: - <dns0-ip>:<dns1-ip> + <dns0-ip>:<dns1-ip>:<ntp0-ip> This parameter tells the kernel how to configure IP addresses of devices and also how to set up the IP routing table. It was originally called @@ -110,6 +111,9 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: will not be triggered if it is missing and NFS root is not in operation. + Value is exported to /proc/net/pnp with the prefix "bootserver " + (see below). + Default: Determined using autoconfiguration. The address of the autoconfiguration server is used. @@ -123,10 +127,13 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: Default: Determined using autoconfiguration. - <hostname> Name of the client. May be supplied by autoconfiguration, - but its absence will not trigger autoconfiguration. - If specified and DHCP is used, the user provided hostname will - be carried in the DHCP request to hopefully update DNS record. + <hostname> Name of the client. If a '.' character is present, anything + before the first '.' is used as the client's hostname, and anything + after it is used as its NIS domain name. May be supplied by + autoconfiguration, but its absence will not trigger autoconfiguration. + If specified and DHCP is used, the user-provided hostname (and NIS + domain name, if present) will be carried in the DHCP request; this + may cause a DNS record to be created or updated for the client. Default: Client IP address is used in ASCII notation. @@ -162,12 +169,55 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: Default: any - <dns0-ip> IP address of first nameserver. - Value gets exported by /proc/net/pnp which is often linked - on embedded systems by /etc/resolv.conf. + <dns0-ip> IP address of primary nameserver. + Value is exported to /proc/net/pnp with the prefix "nameserver " + (see below). + + Default: None if not using autoconfiguration; determined + automatically if using autoconfiguration. + + <dns1-ip> IP address of secondary nameserver. + See <dns0-ip>. + + <ntp0-ip> IP address of a Network Time Protocol (NTP) server. + Value is exported to /proc/net/ipconfig/ntp_servers, but is + otherwise unused (see below). + + Default: None if not using autoconfiguration; determined + automatically if using autoconfiguration. + + After configuration (whether manual or automatic) is complete, two files + are created in the following format; lines are omitted if their respective + value is empty following configuration: + + - /proc/net/pnp: + + #PROTO: <DHCP|BOOTP|RARP|MANUAL> (depending on configuration method) + domain <dns-domain> (if autoconfigured, the DNS domain) + nameserver <dns0-ip> (primary name server IP) + nameserver <dns1-ip> (secondary name server IP) + nameserver <dns2-ip> (tertiary name server IP) + bootserver <server-ip> (NFS server IP) + + - /proc/net/ipconfig/ntp_servers: + + <ntp0-ip> (NTP server IP) + <ntp1-ip> (NTP server IP) + <ntp2-ip> (NTP server IP) + + <dns-domain> and <dns2-ip> (in /proc/net/pnp) and <ntp1-ip> and <ntp2-ip> + (in /proc/net/ipconfig/ntp_servers) are requested during autoconfiguration; + they cannot be specified as part of the "ip=" kernel command line parameter. + + Because the "domain" and "nameserver" options are recognised by DNS + resolvers, /etc/resolv.conf is often linked to /proc/net/pnp on systems + that use an NFS root filesystem. - <dns1-ip> IP address of second nameserver. - Same as above. + Note that the kernel will not synchronise the system time with any NTP + servers it discovers; this is the responsibility of a user space process + (e.g. an initrd/initramfs script that passes the IP addresses listed in + /proc/net/ipconfig/ntp_servers to an NTP client before mounting the real + root filesystem if it is on NFS). nfsrootdebug diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 35ffaa281b26..59afc9a10b4f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1428,6 +1428,19 @@ ip6frag_low_thresh - INTEGER ip6frag_time - INTEGER Time in seconds to keep an IPv6 fragment in memory. +IPv6 Segment Routing: + +seg6_flowlabel - INTEGER + Controls the behaviour of computing the flowlabel of outer + IPv6 header in case of SR T.encaps + + -1 set flowlabel to zero. + 0 copy flowlabel from Inner packet in case of Inner IPv6 + (Set flowlabel to 0 in case IPv4/L2) + 1 Compute the flowlabel using seg6_make_flowlabel() + + Default is 0. + conf/default/*: Change the interface-specific default settings. diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index c77f9d57eb91..c4a54c162547 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -113,6 +113,13 @@ whatever headers there might be. NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6). + * Transmit UDP segmentation offload + +NETIF_F_GSO_UDP_GSO_L4 accepts a single UDP header with a payload that exceeds +gso_size. On segmentation, it segments the payload on gso_size boundaries and +replicates the network and UDP headers (fixing up the last one if less than +gso_size). + * Transmit DMA from high memory On platforms where this is relevant, NETIF_F_HIGHDMA signals that diff --git a/MAINTAINERS b/MAINTAINERS index b1ccabd0dbc3..ebe0b9ed7805 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5328,7 +5328,6 @@ F: include/linux/*mdio*.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h -F: include/linux/platform_data/mdio-gpio.h F: include/linux/platform_data/mdio-bcm-unimac.h F: include/trace/events/mdio.h F: include/uapi/linux/mdio.h @@ -9030,26 +9029,17 @@ W: http://www.mellanox.com Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlx5/core/en_* -MELLANOX ETHERNET INNOVA DRIVER -M: Ilan Tayari <ilant@mellanox.com> -R: Boris Pismenny <borisp@mellanox.com> +MELLANOX ETHERNET INNOVA DRIVERS +M: Boris Pismenny <borisp@mellanox.com> L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com Q: http://patchwork.ozlabs.org/project/netdev/list/ +F: drivers/net/ethernet/mellanox/mlx5/core/en_accel/* +F: drivers/net/ethernet/mellanox/mlx5/core/accel/* F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h -MELLANOX ETHERNET INNOVA IPSEC DRIVER -M: Ilan Tayari <ilant@mellanox.com> -R: Boris Pismenny <borisp@mellanox.com> -L: netdev@vger.kernel.org -S: Supported -W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ -F: drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/* -F: drivers/net/ethernet/mellanox/mlx5/core/ipsec* - MELLANOX ETHERNET SWITCH DRIVERS M: Jiri Pirko <jiri@mellanox.com> M: Ido Schimmel <idosch@mellanox.com> @@ -9842,7 +9832,7 @@ F: net/netfilter/xt_CONNSECMARK.c F: net/netfilter/xt_SECMARK.c NETWORKING [TLS] -M: Ilya Lesokhin <ilyal@mellanox.com> +M: Boris Pismenny <borisp@mellanox.com> M: Aviad Yehezkel <aviadye@mellanox.com> M: Dave Watson <davejwatson@fb.com> L: netdev@vger.kernel.org @@ -14626,7 +14616,9 @@ M: Woojung Huh <woojung.huh@microchip.com> M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> L: netdev@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/net/microchip,lan78xx.txt F: drivers/net/usb/lan78xx.* +F: include/dt-bindings/net/microchip-lan78xx.h USB MASS STORAGE DRIVER M: Alan Stern <stern@rowland.harvard.edu> diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index a782ce87715c..ed5e42461094 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -262,6 +262,8 @@ void proc_coredump_connector(struct task_struct *task) ev->what = PROC_EVENT_COREDUMP; ev->event_data.coredump.process_pid = task->pid; ev->event_data.coredump.process_tgid = task->tgid; + ev->event_data.coredump.parent_pid = task->real_parent->pid; + ev->event_data.coredump.parent_tgid = task->real_parent->tgid; memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ @@ -288,6 +290,8 @@ void proc_exit_connector(struct task_struct *task) ev->event_data.exit.process_tgid = task->tgid; ev->event_data.exit.exit_code = task->exit_code; ev->event_data.exit.exit_signal = task->exit_signal; + ev->event_data.exit.parent_pid = task->real_parent->pid; + ev->event_data.exit.parent_tgid = task->real_parent->tgid; memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index 7afbb28d6a0f..1bc5ffb338c8 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -270,7 +270,7 @@ EXPORT_SYMBOL_GPL(dca_remove_requester); * @dev - the device that wants dca service * @cpu - the cpuid as returned by get_cpu() */ -u8 dca_common_get_tag(struct device *dev, int cpu) +static u8 dca_common_get_tag(struct device *dev, int cpu) { struct dca_provider *dca; u8 tag; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c index 4be3aef40bd2..267da8215e08 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -443,17 +443,16 @@ static u8 opa_vnic_get_rc(struct __opa_veswport_info *info, } /* opa_vnic_calc_entropy - calculate the packet entropy */ -u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb) +u8 opa_vnic_calc_entropy(struct sk_buff *skb) { - u16 hash16; - - /* - * Get flow based 16-bit hash and then XOR the upper and lower bytes - * to get the entropy. - * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash. - */ - hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15)); - return (u8)((hash16 >> 8) ^ (hash16 & 0xff)); + u32 hash = skb_get_hash(skb); + + /* store XOR of all bytes in lower 8 bits */ + hash ^= hash >> 8; + hash ^= hash >> 16; + + /* return lower 8 bits as entropy */ + return (u8)(hash & 0xFF); } /* opa_vnic_get_def_port - get default port based on entropy */ @@ -490,7 +489,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) hdr = skb_push(skb, OPA_VNIC_HDR_LEN); - entropy = opa_vnic_calc_entropy(adapter, skb); + entropy = opa_vnic_calc_entropy(skb); def_port = opa_vnic_get_def_port(adapter, entropy); len = opa_vnic_wire_length(skb); dlid = opa_vnic_get_dlid(adapter, skb, def_port); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index afd95f432262..43ac61ffef4a 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -299,7 +299,7 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter); void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb); u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb); -u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb); +u8 opa_vnic_calc_entropy(struct sk_buff *skb); void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter); void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter); void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter, diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index ce57e0f10289..0c8aec62a425 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -104,7 +104,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, /* pass entropy and vl as metadata in skb */ mdata = skb_push(skb, sizeof(*mdata)); - mdata->entropy = opa_vnic_calc_entropy(adapter, skb); + mdata->entropy = opa_vnic_calc_entropy(skb); mdata->vl = opa_vnic_get_vl(adapter, skb); rc = adapter->rn_ops->ndo_select_queue(netdev, skb, accel_priv, fallback); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 891846655000..a029b27fd002 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -198,6 +198,7 @@ config VXLAN config GENEVE tristate "Generic Network Virtualization Encapsulation" depends on INET && NET_UDP_TUNNEL + depends on IPV6 || !IPV6 select NET_IP_TUNNEL select GRO_CELLS ---help--- diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 78616787f2a3..9f561fe505cb 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -806,16 +806,39 @@ static unsigned int b53_get_mib_size(struct b53_device *dev) return B53_MIBS_SIZE; } -void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +static struct phy_device *b53_get_phy_device(struct dsa_switch *ds, int port) +{ + /* These ports typically do not have built-in PHYs */ + switch (port) { + case B53_CPU_PORT_25: + case 7: + case B53_CPU_PORT: + return NULL; + } + + return mdiobus_get_phy(ds->slave_mii_bus, port); +} + +void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset, + uint8_t *data) { struct b53_device *dev = ds->priv; const struct b53_mib_desc *mibs = b53_get_mib(dev); unsigned int mib_size = b53_get_mib_size(dev); + struct phy_device *phydev; unsigned int i; - for (i = 0; i < mib_size; i++) - strlcpy(data + i * ETH_GSTRING_LEN, - mibs[i].name, ETH_GSTRING_LEN); + if (stringset == ETH_SS_STATS) { + for (i = 0; i < mib_size; i++) + strlcpy(data + i * ETH_GSTRING_LEN, + mibs[i].name, ETH_GSTRING_LEN); + } else if (stringset == ETH_SS_PHY_STATS) { + phydev = b53_get_phy_device(ds, port); + if (!phydev) + return; + + phy_ethtool_get_strings(phydev, data); + } } EXPORT_SYMBOL(b53_get_strings); @@ -852,11 +875,34 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) } EXPORT_SYMBOL(b53_get_ethtool_stats); -int b53_get_sset_count(struct dsa_switch *ds, int port) +void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data) +{ + struct phy_device *phydev; + + phydev = b53_get_phy_device(ds, port); + if (!phydev) + return; + + phy_ethtool_get_stats(phydev, NULL, data); +} +EXPORT_SYMBOL(b53_get_ethtool_phy_stats); + +int b53_get_sset_count(struct dsa_switch *ds, int port, int sset) { struct b53_device *dev = ds->priv; + struct phy_device *phydev; + + if (sset == ETH_SS_STATS) { + return b53_get_mib_size(dev); + } else if (sset == ETH_SS_PHY_STATS) { + phydev = b53_get_phy_device(ds, port); + if (!phydev) + return 0; + + return phy_ethtool_get_sset_count(phydev); + } - return b53_get_mib_size(dev); + return 0; } EXPORT_SYMBOL(b53_get_sset_count); @@ -1477,7 +1523,7 @@ void b53_br_fast_age(struct dsa_switch *ds, int port) } EXPORT_SYMBOL(b53_br_fast_age); -static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port) +static bool b53_possible_cpu_port(struct dsa_switch *ds, int port) { /* Broadcom switches will accept enabling Broadcom tags on the * following ports: 5, 7 and 8, any other port is not supported @@ -1489,10 +1535,19 @@ static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port) return true; } - dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n", port); return false; } +static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port) +{ + bool ret = b53_possible_cpu_port(ds, port); + + if (!ret) + dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n", + port); + return ret; +} + enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port) { struct b53_device *dev = ds->priv; @@ -1650,6 +1705,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .get_strings = b53_get_strings, .get_ethtool_stats = b53_get_ethtool_stats, .get_sset_count = b53_get_sset_count, + .get_ethtool_phy_stats = b53_get_ethtool_phy_stats, .phy_read = b53_phy_read16, .phy_write = b53_phy_write16, .adjust_link = b53_adjust_link, @@ -1954,6 +2010,15 @@ static int b53_switch_init(struct b53_device *dev) dev->num_ports = dev->cpu_port + 1; dev->enabled_ports |= BIT(dev->cpu_port); + /* Include non standard CPU port built-in PHYs to be probed */ + if (is539x(dev) || is531x5(dev)) { + for (i = 0; i < dev->num_ports; i++) { + if (!(dev->ds->phys_mii_mask & BIT(i)) && + !b53_possible_cpu_port(dev->ds, i)) + dev->ds->phys_mii_mask |= BIT(i); + } + } + dev->ports = devm_kzalloc(dev->dev, sizeof(struct b53_port) * dev->num_ports, GFP_KERNEL); diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 1187ebd79287..cc284a514de9 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -286,9 +286,11 @@ static inline int b53_switch_get_reset_gpio(struct b53_device *dev) /* Exported functions towards other drivers */ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port); int b53_configure_vlan(struct dsa_switch *ds); -void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data); +void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset, + uint8_t *data); void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); -int b53_get_sset_count(struct dsa_switch *ds, int port); +int b53_get_sset_count(struct dsa_switch *ds, int port, int sset); +void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge); void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge); void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0378eded31f2..97236cfcbae4 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -859,6 +859,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .get_strings = b53_get_strings, .get_ethtool_stats = b53_get_ethtool_stats, .get_sset_count = b53_get_sset_count, + .get_ethtool_phy_stats = b53_get_ethtool_phy_stats, .get_phy_flags = bcm_sf2_sw_get_phy_flags, .adjust_link = bcm_sf2_sw_adjust_link, .fixed_link_update = bcm_sf2_sw_fixed_link_update, diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index f77be9f85cb3..58f14af04639 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -86,16 +86,23 @@ static int dsa_loop_setup(struct dsa_switch *ds) return 0; } -static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port) +static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port, int sset) { + if (sset != ETH_SS_STATS && sset != ETH_SS_PHY_STATS) + return 0; + return __DSA_LOOP_CNT_MAX; } -static void dsa_loop_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +static void dsa_loop_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data) { struct dsa_loop_priv *ps = ds->priv; unsigned int i; + if (stringset != ETH_SS_STATS && stringset != ETH_SS_PHY_STATS) + return; + for (i = 0; i < __DSA_LOOP_CNT_MAX; i++) memcpy(data + i * ETH_GSTRING_LEN, ps->ports[port].mib[i].name, ETH_GSTRING_LEN); @@ -256,6 +263,7 @@ static const struct dsa_switch_ops dsa_loop_driver = { .get_strings = dsa_loop_get_strings, .get_ethtool_stats = dsa_loop_get_ethtool_stats, .get_sset_count = dsa_loop_get_sset_count, + .get_ethtool_phy_stats = dsa_loop_get_ethtool_stats, .phy_read = dsa_loop_phy_read, .phy_write = dsa_loop_phy_write, .port_bridge_join = dsa_loop_port_bridge_join, diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index fefa454f3e56..b4f6e1a67dd9 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -977,10 +977,14 @@ static const struct lan9303_mib_desc lan9303_mib[] = { { .offset = LAN9303_MAC_TX_LATECOL_0, .name = "TxLateCol", }, }; -static void lan9303_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +static void lan9303_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data) { unsigned int u; + if (stringset != ETH_SS_STATS) + return; + for (u = 0; u < ARRAY_SIZE(lan9303_mib); u++) { strncpy(data + u * ETH_GSTRING_LEN, lan9303_mib[u].name, ETH_GSTRING_LEN); @@ -1007,8 +1011,11 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port, } } -static int lan9303_get_sset_count(struct dsa_switch *ds, int port) +static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset) { + if (sset != ETH_SS_STATS) + return 0; + return ARRAY_SIZE(lan9303_mib); } diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index bcb3e6c734f2..7210c49b7922 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -439,15 +439,22 @@ static void ksz_disable_port(struct dsa_switch *ds, int port, ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true); } -static int ksz_sset_count(struct dsa_switch *ds, int port) +static int ksz_sset_count(struct dsa_switch *ds, int port, int sset) { + if (sset != ETH_SS_STATS) + return 0; + return TOTAL_SWITCH_COUNTER_NUM; } -static void ksz_get_strings(struct dsa_switch *ds, int port, uint8_t *buf) +static void ksz_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *buf) { int i; + if (stringset != ETH_SS_STATS) + return; + for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) { memcpy(buf + i * ETH_GSTRING_LEN, mib_names[i].string, ETH_GSTRING_LEN); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 80a4dbc3a499..62e486652e62 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -573,10 +573,14 @@ static int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum, } static void -mt7530_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset, + uint8_t *data) { int i; + if (stringset != ETH_SS_STATS) + return; + for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++) strncpy(data + i * ETH_GSTRING_LEN, mt7530_mib[i].name, ETH_GSTRING_LEN); @@ -604,8 +608,11 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port, } static int -mt7530_get_sset_count(struct dsa_switch *ds, int port) +mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset) { + if (sset != ETH_SS_STATS) + return 0; + return ARRAY_SIZE(mt7530_mib); } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 3d2091099f7f..9d62e4acc01b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -665,13 +665,13 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, case STATS_TYPE_PORT: err = mv88e6xxx_port_read(chip, port, s->reg, ®); if (err) - return UINT64_MAX; + return U64_MAX; low = reg; if (s->size == 4) { err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®); if (err) - return UINT64_MAX; + return U64_MAX; high = reg; } break; @@ -685,7 +685,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, mv88e6xxx_g1_stats_read(chip, reg + 1, &high); break; default: - return UINT64_MAX; + return U64_MAX; } value = (((u64)high) << 16) | low; return value; @@ -742,11 +742,14 @@ static void mv88e6xxx_atu_vtu_get_strings(uint8_t *data) } static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, - uint8_t *data) + u32 stringset, uint8_t *data) { struct mv88e6xxx_chip *chip = ds->priv; int count = 0; + if (stringset != ETH_SS_STATS) + return; + mutex_lock(&chip->reg_lock); if (chip->info->ops->stats_get_strings) @@ -789,12 +792,15 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip) STATS_TYPE_BANK1); } -static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port) +static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port, int sset) { struct mv88e6xxx_chip *chip = ds->priv; int serdes_count = 0; int count = 0; + if (sset != ETH_SS_STATS) + return 0; + mutex_lock(&chip->reg_lock); if (chip->info->ops->stats_get_sset_count) count = chip->info->ops->stats_get_sset_count(chip); @@ -1020,6 +1026,38 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, dev_err(ds->dev, "p%d: failed to update state\n", port); } +static int mv88e6xxx_devmap_setup(struct mv88e6xxx_chip *chip) +{ + int target, port; + int err; + + if (!chip->info->global2_addr) + return 0; + + /* Initialize the routing port to the 32 possible target devices */ + for (target = 0; target < 32; target++) { + port = 0x1f; + if (target < DSA_MAX_SWITCHES) + if (chip->ds->rtable[target] != DSA_RTABLE_NONE) + port = chip->ds->rtable[target]; + + err = mv88e6xxx_g2_device_mapping_write(chip, target, port); + if (err) + return err; + } + + return 0; +} + +static int mv88e6xxx_trunk_setup(struct mv88e6xxx_chip *chip) +{ + /* Clear all trunk masks and mapping */ + if (chip->info->global2_addr) + return mv88e6xxx_g2_trunk_clear(chip); + + return 0; +} + static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip) { if (chip->info->ops->pot_clear) @@ -2190,13 +2228,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; - /* Setup Switch Global 2 Registers */ - if (chip->info->global2_addr) { - err = mv88e6xxx_g2_setup(chip); - if (err) - goto unlock; - } - err = mv88e6xxx_irl_setup(chip); if (err) goto unlock; @@ -2233,6 +2264,14 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; + err = mv88e6xxx_trunk_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_devmap_setup(chip); + if (err) + goto unlock; + /* Setup PTP Hardware Clock and timestamping */ if (chip->info->ptp_support) { err = mv88e6xxx_ptp_setup(chip); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 80490f66bc06..4163c8099d0b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -21,10 +21,6 @@ #include <linux/timecounter.h> #include <net/dsa.h> -#ifndef UINT64_MAX -#define UINT64_MAX (u64)(~((u64)0)) -#endif - #define SMI_CMD 0x00 #define SMI_CMD_BUSY BIT(15) #define SMI_CMD_CLAUSE_22 BIT(12) diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 0ce627fded48..e6d658181b27 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -119,37 +119,17 @@ int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) /* Offset 0x06: Device Mapping Table register */ -static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, - int target, int port) +int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, + int port) { - u16 val = (target << 8) | (port & 0xf); + u16 val = (target << 8) | (port & 0x1f); + /* Modern chips use 5 bits to define a device mapping port, + * but bit 4 is reserved on older chips, so it is safe to use. + */ return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_DEVICE_MAPPING, val); } -static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) -{ - int target, port; - int err; - - /* Initialize the routing port to the 32 possible target devices */ - for (target = 0; target < 32; ++target) { - port = 0xf; - - if (target < DSA_MAX_SWITCHES) { - port = chip->ds->rtable[target]; - if (port == DSA_RTABLE_NONE) - port = 0xf; - } - - err = mv88e6xxx_g2_device_mapping_write(chip, target, port); - if (err) - break; - } - - return err; -} - /* Offset 0x07: Trunk Mask Table register */ static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, @@ -174,7 +154,7 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_TRUNK_MAPPING, val); } -static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) +int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip) { const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; int i, err; @@ -1138,31 +1118,3 @@ void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip, for (phy = 0; phy < chip->info->num_internal_phys; phy++) irq_dispose_mapping(bus->irq[phy]); } - -int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) -{ - u16 reg; - int err; - - /* Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4); - err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg); - if (err) - return err; - - /* Program the DSA routing table. */ - err = mv88e6xxx_g2_set_device_mapping(chip); - if (err) - return err; - - /* Clear all trunk masks and mapping. */ - err = mv88e6xxx_g2_clear_trunk(chip); - if (err) - return err; - - return 0; -} diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 520ec70d32e8..37e8ce2c72a0 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -60,7 +60,8 @@ #define MV88E6XXX_G2_DEVICE_MAPPING 0x06 #define MV88E6XXX_G2_DEVICE_MAPPING_UPDATE 0x8000 #define MV88E6XXX_G2_DEVICE_MAPPING_DEV_MASK 0x1f00 -#define MV88E6XXX_G2_DEVICE_MAPPING_PORT_MASK 0x000f +#define MV88E6352_G2_DEVICE_MAPPING_PORT_MASK 0x000f +#define MV88E6390_G2_DEVICE_MAPPING_PORT_MASK 0x001f /* Offset 0x07: Trunk Mask Table Register */ #define MV88E6XXX_G2_TRUNK_MASK 0x07 @@ -313,7 +314,6 @@ int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev, int src_port, u16 data); int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip); -int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip); void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip); @@ -327,6 +327,11 @@ int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip); +int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip); + +int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, + int port); + extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; @@ -441,11 +446,6 @@ static inline int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip) return -EOPNOTSUPP; } -static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) -{ - return -EOPNOTSUPP; -} - static inline int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) { return -EOPNOTSUPP; @@ -495,6 +495,17 @@ static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip, return -EOPNOTSUPP; } +static inline int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, + int target, int port) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ #endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 600d5ad1fbde..757b6d90ea36 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -600,10 +600,13 @@ qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val) } static void -qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) { int i; + if (stringset != ETH_SS_STATS) + return; + for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name, ETH_GSTRING_LEN); @@ -631,8 +634,11 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, } static int -qca8k_get_sset_count(struct dsa_switch *ds, int port) +qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset) { + if (sset != ETH_SS_STATS) + return 0; + return ARRAY_SIZE(ar8327_mib); } diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 9fee7c83ef9f..f2f0264c58ba 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -29,8 +29,8 @@ config PCMCIA_AXNET called axnet_cs. If unsure, say N. config AX88796 - tristate "ASIX AX88796 NE2000 clone support" - depends on (ARM || MIPS || SUPERH) + tristate "ASIX AX88796 NE2000 clone support" if !ZORRO + depends on (ARM || MIPS || SUPERH || ZORRO || COMPILE_TEST) select CRC32 select PHYLIB select MDIO_BITBANG @@ -45,6 +45,19 @@ config AX88796_93CX6 ---help--- Select this if your platform comes with an external 93CX6 eeprom. +config XSURF100 + tristate "Amiga XSurf 100 AX88796/NE2000 clone support" + depends on ZORRO + select AX88796 + select ASIX_PHY + help + This driver is for the Individual Computers X-Surf 100 Ethernet + card (based on the Asix AX88796 chip). If you have such a card, + say Y. Otherwise, say N. + + To compile this driver as a module, choose M here: the module + will be called xsurf100. + config HYDRA tristate "Hydra support" depends on ZORRO diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile index 1d650e66cc6e..85c83c566ec6 100644 --- a/drivers/net/ethernet/8390/Makefile +++ b/drivers/net/ethernet/8390/Makefile @@ -16,4 +16,5 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o obj-$(CONFIG_STNIC) += stnic.o 8390.o obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o obj-$(CONFIG_WD80x3) += wd.o 8390.o +obj-$(CONFIG_XSURF100) += xsurf100.o obj-$(CONFIG_ZORRO8390) += zorro8390.o diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index da61cf3cb3a9..2a0ddec1dd56 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -163,6 +163,21 @@ static void ax_reset_8390(struct net_device *dev) ei_outb(ENISR_RESET, addr + EN0_ISR); /* Ack intr. */ } +/* Wrapper for __ei_interrupt for platforms that have a platform-specific + * way to find out whether the interrupt request might be caused by + * the ax88796 chip. + */ +static irqreturn_t ax_ei_interrupt_filtered(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct ax_device *ax = to_ax_dev(dev); + struct platform_device *pdev = to_platform_device(dev->dev.parent); + + if (!ax->plat->check_irq(pdev)) + return IRQ_NONE; + + return ax_ei_interrupt(irq, dev_id); +} static void ax_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page) @@ -387,6 +402,90 @@ static void ax_phy_switch(struct net_device *dev, int on) ei_outb(reg_gpoc, ei_local->mem + EI_SHIFT(0x17)); } +static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + + if (level) + ax->reg_memr |= AX_MEMR_MDC; + else + ax->reg_memr &= ~AX_MEMR_MDC; + + ei_outb(ax->reg_memr, ax->addr_memr); +} + +static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + + if (output) + ax->reg_memr &= ~AX_MEMR_MDIR; + else + ax->reg_memr |= AX_MEMR_MDIR; + + ei_outb(ax->reg_memr, ax->addr_memr); +} + +static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + + if (value) + ax->reg_memr |= AX_MEMR_MDO; + else + ax->reg_memr &= ~AX_MEMR_MDO; + + ei_outb(ax->reg_memr, ax->addr_memr); +} + +static int ax_bb_get_data(struct mdiobb_ctrl *ctrl) +{ + struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); + int reg_memr = ei_inb(ax->addr_memr); + + return reg_memr & AX_MEMR_MDI ? 1 : 0; +} + +static const struct mdiobb_ops bb_ops = { + .owner = THIS_MODULE, + .set_mdc = ax_bb_mdc, + .set_mdio_dir = ax_bb_dir, + .set_mdio_data = ax_bb_set_data, + .get_mdio_data = ax_bb_get_data, +}; + +static int ax_mii_init(struct net_device *dev) +{ + struct platform_device *pdev = to_platform_device(dev->dev.parent); + struct ei_device *ei_local = netdev_priv(dev); + struct ax_device *ax = to_ax_dev(dev); + int err; + + ax->bb_ctrl.ops = &bb_ops; + ax->addr_memr = ei_local->mem + AX_MEMR; + ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl); + if (!ax->mii_bus) { + err = -ENOMEM; + goto out; + } + + ax->mii_bus->name = "ax88796_mii_bus"; + ax->mii_bus->parent = dev->dev.parent; + snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); + + err = mdiobus_register(ax->mii_bus); + if (err) + goto out_free_mdio_bitbang; + + return 0; + + out_free_mdio_bitbang: + free_mdio_bitbang(ax->mii_bus); + out: + return err; +} + static int ax_open(struct net_device *dev) { struct ax_device *ax = to_ax_dev(dev); @@ -394,8 +493,16 @@ static int ax_open(struct net_device *dev) netdev_dbg(dev, "open\n"); - ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags, - dev->name, dev); + ret = ax_mii_init(dev); + if (ret) + goto failed_mii; + + if (ax->plat->check_irq) + ret = request_irq(dev->irq, ax_ei_interrupt_filtered, + ax->irqflags, dev->name, dev); + else + ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags, + dev->name, dev); if (ret) goto failed_request_irq; @@ -421,6 +528,10 @@ static int ax_open(struct net_device *dev) ax_phy_switch(dev, 0); free_irq(dev->irq, dev); failed_request_irq: + /* unregister mdiobus */ + mdiobus_unregister(ax->mii_bus); + free_mdio_bitbang(ax->mii_bus); + failed_mii: return ret; } @@ -440,6 +551,9 @@ static int ax_close(struct net_device *dev) phy_disconnect(dev->phydev); free_irq(dev->irq, dev); + + mdiobus_unregister(ax->mii_bus); + free_mdio_bitbang(ax->mii_bus); return 0; } @@ -539,92 +653,8 @@ static const struct net_device_ops ax_netdev_ops = { #endif }; -static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - - if (level) - ax->reg_memr |= AX_MEMR_MDC; - else - ax->reg_memr &= ~AX_MEMR_MDC; - - ei_outb(ax->reg_memr, ax->addr_memr); -} - -static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - - if (output) - ax->reg_memr &= ~AX_MEMR_MDIR; - else - ax->reg_memr |= AX_MEMR_MDIR; - - ei_outb(ax->reg_memr, ax->addr_memr); -} - -static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - - if (value) - ax->reg_memr |= AX_MEMR_MDO; - else - ax->reg_memr &= ~AX_MEMR_MDO; - - ei_outb(ax->reg_memr, ax->addr_memr); -} - -static int ax_bb_get_data(struct mdiobb_ctrl *ctrl) -{ - struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl); - int reg_memr = ei_inb(ax->addr_memr); - - return reg_memr & AX_MEMR_MDI ? 1 : 0; -} - -static const struct mdiobb_ops bb_ops = { - .owner = THIS_MODULE, - .set_mdc = ax_bb_mdc, - .set_mdio_dir = ax_bb_dir, - .set_mdio_data = ax_bb_set_data, - .get_mdio_data = ax_bb_get_data, -}; - /* setup code */ -static int ax_mii_init(struct net_device *dev) -{ - struct platform_device *pdev = to_platform_device(dev->dev.parent); - struct ei_device *ei_local = netdev_priv(dev); - struct ax_device *ax = to_ax_dev(dev); - int err; - - ax->bb_ctrl.ops = &bb_ops; - ax->addr_memr = ei_local->mem + AX_MEMR; - ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl); - if (!ax->mii_bus) { - err = -ENOMEM; - goto out; - } - - ax->mii_bus->name = "ax88796_mii_bus"; - ax->mii_bus->parent = dev->dev.parent; - snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", - pdev->name, pdev->id); - - err = mdiobus_register(ax->mii_bus); - if (err) - goto out_free_mdio_bitbang; - - return 0; - - out_free_mdio_bitbang: - free_mdio_bitbang(ax->mii_bus); - out: - return err; -} - static void ax_initial_setup(struct net_device *dev, struct ei_device *ei_local) { void __iomem *ioaddr = ei_local->mem; @@ -669,10 +699,16 @@ static int ax_init_dev(struct net_device *dev) if (ax->plat->flags & AXFLG_HAS_EEPROM) { unsigned char SA_prom[32]; + ei_outb(6, ioaddr + EN0_RCNTLO); + ei_outb(0, ioaddr + EN0_RCNTHI); + ei_outb(0, ioaddr + EN0_RSARLO); + ei_outb(0, ioaddr + EN0_RSARHI); + ei_outb(E8390_RREAD + E8390_START, ioaddr + NE_CMD); for (i = 0; i < sizeof(SA_prom); i += 2) { SA_prom[i] = ei_inb(ioaddr + NE_DATAPORT); SA_prom[i + 1] = ei_inb(ioaddr + NE_DATAPORT); } + ei_outb(ENISR_RDC, ioaddr + EN0_ISR); /* Ack intr. */ if (ax->plat->wordlength == 2) for (i = 0; i < 16; i++) @@ -741,18 +777,20 @@ static int ax_init_dev(struct net_device *dev) #endif ei_local->reset_8390 = &ax_reset_8390; - ei_local->block_input = &ax_block_input; - ei_local->block_output = &ax_block_output; + if (ax->plat->block_input) + ei_local->block_input = ax->plat->block_input; + else + ei_local->block_input = &ax_block_input; + if (ax->plat->block_output) + ei_local->block_output = ax->plat->block_output; + else + ei_local->block_output = &ax_block_output; ei_local->get_8390_hdr = &ax_get_8390_hdr; ei_local->priv = 0; dev->netdev_ops = &ax_netdev_ops; dev->ethtool_ops = &ax_ethtool_ops; - ret = ax_mii_init(dev); - if (ret) - goto err_out; - ax_NS8390_init(dev, 0); ret = register_netdev(dev); @@ -777,7 +815,6 @@ static int ax_remove(struct platform_device *pdev) struct resource *mem; unregister_netdev(dev); - free_irq(dev->irq, dev); iounmap(ei_local->mem); mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -789,6 +826,7 @@ static int ax_remove(struct platform_device *pdev) release_mem_region(mem->start, resource_size(mem)); } + platform_set_drvdata(pdev, NULL); free_netdev(dev); return 0; @@ -835,6 +873,9 @@ static int ax_probe(struct platform_device *pdev) dev->irq = irq->start; ax->irqflags = irq->flags & IRQF_TRIGGER_MASK; + if (irq->flags & IORESOURCE_IRQ_SHAREABLE) + ax->irqflags |= IRQF_SHARED; + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!mem) { dev_err(&pdev->dev, "no MEM specified\n"); @@ -919,6 +960,7 @@ static int ax_probe(struct platform_device *pdev) release_mem_region(mem->start, mem_size); exit_mem: + platform_set_drvdata(pdev, NULL); free_netdev(dev); return ret; diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c new file mode 100644 index 000000000000..e2c963821ffe --- /dev/null +++ b/drivers/net/ethernet/8390/xsurf100.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/zorro.h> +#include <net/ax88796.h> +#include <asm/amigaints.h> + +#define ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100 \ + ZORRO_ID(INDIVIDUAL_COMPUTERS, 0x64, 0) + +#define XS100_IRQSTATUS_BASE 0x40 +#define XS100_8390_BASE 0x800 + +/* Longword-access area. Translated to 2 16-bit access cycles by the + * X-Surf 100 FPGA + */ +#define XS100_8390_DATA32_BASE 0x8000 +#define XS100_8390_DATA32_SIZE 0x2000 +/* Sub-Areas for fast data register access; addresses relative to area begin */ +#define XS100_8390_DATA_READ32_BASE 0x0880 +#define XS100_8390_DATA_WRITE32_BASE 0x0C80 +#define XS100_8390_DATA_AREA_SIZE 0x80 + +#define __NS8390_init ax_NS8390_init + +/* force unsigned long back to 'void __iomem *' */ +#define ax_convert_addr(_a) ((void __force __iomem *)(_a)) + +#define ei_inb(_a) z_readb(ax_convert_addr(_a)) +#define ei_outb(_v, _a) z_writeb(_v, ax_convert_addr(_a)) + +#define ei_inw(_a) z_readw(ax_convert_addr(_a)) +#define ei_outw(_v, _a) z_writew(_v, ax_convert_addr(_a)) + +#define ei_inb_p(_a) ei_inb(_a) +#define ei_outb_p(_v, _a) ei_outb(_v, _a) + +/* define EI_SHIFT() to take into account our register offsets */ +#define EI_SHIFT(x) (ei_local->reg_offset[(x)]) + +/* Ensure we have our RCR base value */ +#define AX88796_PLATFORM + +static unsigned char version[] = + "ax88796.c: Copyright 2005,2007 Simtec Electronics\n"; + +#include "lib8390.c" + +/* from ne.c */ +#define NE_CMD EI_SHIFT(0x00) +#define NE_RESET EI_SHIFT(0x1f) +#define NE_DATAPORT EI_SHIFT(0x10) + +struct xsurf100_ax_plat_data { + struct ax_plat_data ax; + void __iomem *base_regs; + void __iomem *data_area; +}; + +static int is_xsurf100_network_irq(struct platform_device *pdev) +{ + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + return (readw(xs100->base_regs + XS100_IRQSTATUS_BASE) & 0xaaaa) != 0; +} + +/* These functions guarantee that the iomem is accessed with 32 bit + * cycles only. z_memcpy_fromio / z_memcpy_toio don't + */ +static void z_memcpy_fromio32(void *dst, const void __iomem *src, size_t bytes) +{ + while (bytes > 32) { + asm __volatile__ + ("movem.l (%0)+,%%d0-%%d7\n" + "movem.l %%d0-%%d7,(%1)\n" + "adda.l #32,%1" : "=a"(src), "=a"(dst) + : "0"(src), "1"(dst) : "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "memory"); + bytes -= 32; + } + while (bytes) { + *(uint32_t *)dst = z_readl(src); + src += 4; + dst += 4; + bytes -= 4; + } +} + +static void z_memcpy_toio32(void __iomem *dst, const void *src, size_t bytes) +{ + while (bytes) { + z_writel(*(const uint32_t *)src, dst); + src += 4; + dst += 4; + bytes -= 4; + } +} + +static void xs100_write(struct net_device *dev, const void *src, + unsigned int count) +{ + struct ei_device *ei_local = netdev_priv(dev); + struct platform_device *pdev = to_platform_device(dev->dev.parent); + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + /* copy whole blocks */ + while (count > XS100_8390_DATA_AREA_SIZE) { + z_memcpy_toio32(xs100->data_area + + XS100_8390_DATA_WRITE32_BASE, src, + XS100_8390_DATA_AREA_SIZE); + src += XS100_8390_DATA_AREA_SIZE; + count -= XS100_8390_DATA_AREA_SIZE; + } + /* copy whole dwords */ + z_memcpy_toio32(xs100->data_area + XS100_8390_DATA_WRITE32_BASE, + src, count & ~3); + src += count & ~3; + if (count & 2) { + ei_outw(*(uint16_t *)src, ei_local->mem + NE_DATAPORT); + src += 2; + } + if (count & 1) + ei_outb(*(uint8_t *)src, ei_local->mem + NE_DATAPORT); +} + +static void xs100_read(struct net_device *dev, void *dst, unsigned int count) +{ + struct ei_device *ei_local = netdev_priv(dev); + struct platform_device *pdev = to_platform_device(dev->dev.parent); + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + /* copy whole blocks */ + while (count > XS100_8390_DATA_AREA_SIZE) { + z_memcpy_fromio32(dst, xs100->data_area + + XS100_8390_DATA_READ32_BASE, + XS100_8390_DATA_AREA_SIZE); + dst += XS100_8390_DATA_AREA_SIZE; + count -= XS100_8390_DATA_AREA_SIZE; + } + /* copy whole dwords */ + z_memcpy_fromio32(dst, xs100->data_area + XS100_8390_DATA_READ32_BASE, + count & ~3); + dst += count & ~3; + if (count & 2) { + *(uint16_t *)dst = ei_inw(ei_local->mem + NE_DATAPORT); + dst += 2; + } + if (count & 1) + *(uint8_t *)dst = ei_inb(ei_local->mem + NE_DATAPORT); +} + +/* Block input and output, similar to the Crynwr packet driver. If + * you are porting to a new ethercard, look at the packet driver + * source for hints. The NEx000 doesn't share the on-board packet + * memory -- you have to put the packet out through the "remote DMA" + * dataport using ei_outb. + */ +static void xs100_block_input(struct net_device *dev, int count, + struct sk_buff *skb, int ring_offset) +{ + struct ei_device *ei_local = netdev_priv(dev); + void __iomem *nic_base = ei_local->mem; + char *buf = skb->data; + + if (ei_local->dmaing) { + netdev_err(dev, + "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n", + __func__, + ei_local->dmaing, ei_local->irqlock); + return; + } + + ei_local->dmaing |= 0x01; + + ei_outb(E8390_NODMA + E8390_PAGE0 + E8390_START, nic_base + NE_CMD); + ei_outb(count & 0xff, nic_base + EN0_RCNTLO); + ei_outb(count >> 8, nic_base + EN0_RCNTHI); + ei_outb(ring_offset & 0xff, nic_base + EN0_RSARLO); + ei_outb(ring_offset >> 8, nic_base + EN0_RSARHI); + ei_outb(E8390_RREAD + E8390_START, nic_base + NE_CMD); + + xs100_read(dev, buf, count); + + ei_local->dmaing &= ~1; +} + +static void xs100_block_output(struct net_device *dev, int count, + const unsigned char *buf, const int start_page) +{ + struct ei_device *ei_local = netdev_priv(dev); + void __iomem *nic_base = ei_local->mem; + unsigned long dma_start; + + /* Round the count up for word writes. Do we need to do this? + * What effect will an odd byte count have on the 8390? I + * should check someday. + */ + if (ei_local->word16 && (count & 0x01)) + count++; + + /* This *shouldn't* happen. If it does, it's the last thing + * you'll see + */ + if (ei_local->dmaing) { + netdev_err(dev, + "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n", + __func__, + ei_local->dmaing, ei_local->irqlock); + return; + } + + ei_local->dmaing |= 0x01; + /* We should already be in page 0, but to be safe... */ + ei_outb(E8390_PAGE0 + E8390_START + E8390_NODMA, nic_base + NE_CMD); + + ei_outb(ENISR_RDC, nic_base + EN0_ISR); + + /* Now the normal output. */ + ei_outb(count & 0xff, nic_base + EN0_RCNTLO); + ei_outb(count >> 8, nic_base + EN0_RCNTHI); + ei_outb(0x00, nic_base + EN0_RSARLO); + ei_outb(start_page, nic_base + EN0_RSARHI); + + ei_outb(E8390_RWRITE + E8390_START, nic_base + NE_CMD); + + xs100_write(dev, buf, count); + + dma_start = jiffies; + + while ((ei_inb(nic_base + EN0_ISR) & ENISR_RDC) == 0) { + if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */ + netdev_warn(dev, "timeout waiting for Tx RDC.\n"); + ei_local->reset_8390(dev); + ax_NS8390_init(dev, 1); + break; + } + } + + ei_outb(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */ + ei_local->dmaing &= ~0x01; +} + +static int xsurf100_probe(struct zorro_dev *zdev, + const struct zorro_device_id *ent) +{ + struct platform_device *pdev; + struct xsurf100_ax_plat_data ax88796_data; + struct resource res[2] = { + DEFINE_RES_NAMED(IRQ_AMIGA_PORTS, 1, NULL, + IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE), + DEFINE_RES_MEM(zdev->resource.start + XS100_8390_BASE, + 4 * 0x20) + }; + int reg; + /* This table is referenced in the device structure, so it must + * outlive the scope of xsurf100_probe. + */ + static u32 reg_offsets[32]; + int ret = 0; + + /* X-Surf 100 control and 32 bit ring buffer data access areas. + * These resources are not used by the ax88796 driver, so must + * be requested here and passed via platform data. + */ + + if (!request_mem_region(zdev->resource.start, 0x100, zdev->name)) { + dev_err(&zdev->dev, "cannot reserve X-Surf 100 control registers\n"); + return -ENXIO; + } + + if (!request_mem_region(zdev->resource.start + + XS100_8390_DATA32_BASE, + XS100_8390_DATA32_SIZE, + "X-Surf 100 32-bit data access")) { + dev_err(&zdev->dev, "cannot reserve 32-bit area\n"); + ret = -ENXIO; + goto exit_req; + } + + for (reg = 0; reg < 0x20; reg++) + reg_offsets[reg] = 4 * reg; + + memset(&ax88796_data, 0, sizeof(ax88796_data)); + ax88796_data.ax.flags = AXFLG_HAS_EEPROM; + ax88796_data.ax.wordlength = 2; + ax88796_data.ax.dcr_val = 0x48; + ax88796_data.ax.rcr_val = 0x40; + ax88796_data.ax.reg_offsets = reg_offsets; + ax88796_data.ax.check_irq = is_xsurf100_network_irq; + ax88796_data.base_regs = ioremap(zdev->resource.start, 0x100); + + /* error handling for ioremap regs */ + if (!ax88796_data.base_regs) { + dev_err(&zdev->dev, "Cannot ioremap area %pR (registers)\n", + &zdev->resource); + + ret = -ENXIO; + goto exit_req2; + } + + ax88796_data.data_area = ioremap(zdev->resource.start + + XS100_8390_DATA32_BASE, XS100_8390_DATA32_SIZE); + + /* error handling for ioremap data */ + if (!ax88796_data.data_area) { + dev_err(&zdev->dev, + "Cannot ioremap area %pR offset %x (32-bit access)\n", + &zdev->resource, XS100_8390_DATA32_BASE); + + ret = -ENXIO; + goto exit_mem; + } + + ax88796_data.ax.block_output = xs100_block_output; + ax88796_data.ax.block_input = xs100_block_input; + + pdev = platform_device_register_resndata(&zdev->dev, "ax88796", + zdev->slotaddr, res, 2, + &ax88796_data, + sizeof(ax88796_data)); + + if (IS_ERR(pdev)) { + dev_err(&zdev->dev, "cannot register platform device\n"); + ret = -ENXIO; + goto exit_mem2; + } + + zorro_set_drvdata(zdev, pdev); + + if (!ret) + return 0; + + exit_mem2: + iounmap(ax88796_data.data_area); + + exit_mem: + iounmap(ax88796_data.base_regs); + + exit_req2: + release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE, + XS100_8390_DATA32_SIZE); + + exit_req: + release_mem_region(zdev->resource.start, 0x100); + + return ret; +} + +static void xsurf100_remove(struct zorro_dev *zdev) +{ + struct platform_device *pdev = zorro_get_drvdata(zdev); + struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev); + + platform_device_unregister(pdev); + + iounmap(xs100->base_regs); + release_mem_region(zdev->resource.start, 0x100); + iounmap(xs100->data_area); + release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE, + XS100_8390_DATA32_SIZE); +} + +static const struct zorro_device_id xsurf100_zorro_tbl[] = { + { ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100, }, + { 0 } +}; + +MODULE_DEVICE_TABLE(zorro, xsurf100_zorro_tbl); + +static struct zorro_driver xsurf100_driver = { + .name = "xsurf100", + .id_table = xsurf100_zorro_tbl, + .probe = xsurf100_probe, + .remove = xsurf100_remove, +}; + +module_driver(xsurf100_driver, zorro_register_driver, zorro_unregister_driver); + +MODULE_DESCRIPTION("X-Surf 100 driver"); +MODULE_AUTHOR("Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index c99e3e845ac0..a90080f12e67 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1074,16 +1074,12 @@ static int amd8111e_calc_coalesce(struct net_device *dev) amd8111e_set_coalesce(dev,TX_INTR_COAL); coal_conf->tx_coal_type = MEDIUM_COALESCE; } - - } - else if(tx_pkt_size >= 1024){ - if (tx_pkt_size >= 1024){ - if(coal_conf->tx_coal_type != HIGH_COALESCE){ - coal_conf->tx_timeout = 4; - coal_conf->tx_event_count = 8; - amd8111e_set_coalesce(dev,TX_INTR_COAL); - coal_conf->tx_coal_type = HIGH_COALESCE; - } + } else if (tx_pkt_size >= 1024) { + if (coal_conf->tx_coal_type != HIGH_COALESCE) { + coal_conf->tx_timeout = 4; + coal_conf->tx_event_count = 8; + amd8111e_set_coalesce(dev, TX_INTR_COAL); + coal_conf->tx_coal_type = HIGH_COALESCE; } } } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f33b25fbca63..d5fca2e5a9bc 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -654,7 +654,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev, pkts = priv->rx_max_coalesced_frames; if (ec->use_adaptive_rx_coalesce && !priv->dim.use_dim) { - moder = net_dim_get_def_profile(priv->dim.dim.mode); + moder = net_dim_get_def_rx_moderation(priv->dim.dim.mode); usecs = moder.usec; pkts = moder.pkts; } @@ -1064,7 +1064,7 @@ static void bcm_sysport_dim_work(struct work_struct *work) struct bcm_sysport_priv *priv = container_of(ndim, struct bcm_sysport_priv, dim); struct net_dim_cq_moder cur_profile = - net_dim_get_profile(dim->mode, dim->profile_ix); + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts); dim->state = NET_DIM_START_MEASURE; @@ -1437,7 +1437,7 @@ static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv) /* If DIM was enabled, re-apply default parameters */ if (dim->use_dim) { - moder = net_dim_get_def_profile(dim->dim.mode); + moder = net_dim_get_def_rx_moderation(dim->dim.mode); usecs = moder.usec; pkts = moder.pkts; } diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index 7c560d545c03..5a779b19d149 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_BNXT) += bnxt_en.o bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o +bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f83769d8047b..efe5c7203f60 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -62,6 +62,7 @@ #include "bnxt_vfr.h" #include "bnxt_tc.h" #include "bnxt_devlink.h" +#include "bnxt_debugfs.h" #define BNXT_TX_TIMEOUT (5 * HZ) @@ -2383,6 +2384,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) for (i = 0, j = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; struct bnxt_ring_struct *ring; + u8 qidx; ring = &txr->tx_ring_struct; @@ -2411,7 +2413,8 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) memset(txr->tx_push, 0, sizeof(struct tx_push_bd)); } - ring->queue_id = bp->q_info[j].queue_id; + qidx = bp->tc_to_qidx[j]; + ring->queue_id = bp->q_info[qidx].queue_id; if (i < bp->tx_nr_rings_xdp) continue; if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1)) @@ -3493,15 +3496,29 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (!timeout) timeout = DFLT_HWRM_CMD_TIMEOUT; + /* convert timeout to usec */ + timeout *= 1000; i = 0; - tmo_count = timeout * 40; + /* Short timeout for the first few iterations: + * number of loops = number of loops for short timeout + + * number of loops for standard timeout. + */ + tmo_count = HWRM_SHORT_TIMEOUT_COUNTER; + timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER; + tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT); resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET; if (intr_process) { /* Wait until hwrm response cmpl interrupt is processed */ while (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID && i++ < tmo_count) { - usleep_range(25, 40); + /* on first few passes, just barely sleep */ + if (i < HWRM_SHORT_TIMEOUT_COUNTER) + usleep_range(HWRM_SHORT_MIN_TIMEOUT, + HWRM_SHORT_MAX_TIMEOUT); + else + usleep_range(HWRM_MIN_TIMEOUT, + HWRM_MAX_TIMEOUT); } if (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID) { @@ -3519,7 +3536,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, HWRM_RESP_LEN_SFT; if (len) break; - usleep_range(25, 40); + /* on first few passes, just barely sleep */ + if (i < DFLT_HWRM_CMD_TIMEOUT) + usleep_range(HWRM_SHORT_MIN_TIMEOUT, + HWRM_SHORT_MAX_TIMEOUT); + else + usleep_range(HWRM_MIN_TIMEOUT, + HWRM_MAX_TIMEOUT); } if (i >= tmo_count) { @@ -4334,26 +4357,9 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, mutex_unlock(&bp->hwrm_cmd_lock); if (rc || err) { - switch (ring_type) { - case RING_FREE_REQ_RING_TYPE_L2_CMPL: - netdev_err(bp->dev, "hwrm_ring_alloc cp failed. rc:%x err:%x\n", - rc, err); - return -1; - - case RING_FREE_REQ_RING_TYPE_RX: - netdev_err(bp->dev, "hwrm_ring_alloc rx failed. rc:%x err:%x\n", - rc, err); - return -1; - - case RING_FREE_REQ_RING_TYPE_TX: - netdev_err(bp->dev, "hwrm_ring_alloc tx failed. rc:%x err:%x\n", - rc, err); - return -1; - - default: - netdev_err(bp->dev, "Invalid ring\n"); - return -1; - } + netdev_err(bp->dev, "hwrm_ring_alloc type %d failed. rc:%x err:%x\n", + ring_type, rc, err); + return -EIO; } ring->fw_ring_id = ring_id; return rc; @@ -4477,23 +4483,9 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp, mutex_unlock(&bp->hwrm_cmd_lock); if (rc || error_code) { - switch (ring_type) { - case RING_FREE_REQ_RING_TYPE_L2_CMPL: - netdev_err(bp->dev, "hwrm_ring_free cp failed. rc:%d\n", - rc); - return rc; - case RING_FREE_REQ_RING_TYPE_RX: - netdev_err(bp->dev, "hwrm_ring_free rx failed. rc:%d\n", - rc); - return rc; - case RING_FREE_REQ_RING_TYPE_TX: - netdev_err(bp->dev, "hwrm_ring_free tx failed. rc:%d\n", - rc); - return rc; - default: - netdev_err(bp->dev, "Invalid ring\n"); - return -1; - } + netdev_err(bp->dev, "hwrm_ring_free type %d failed. rc:%x err:%x\n", + ring_type, rc, error_code); + return -EIO; } return 0; } @@ -4721,6 +4713,10 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps, cp_rings, vnics); + req.enables |= cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS | + FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS); + req.num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX); + req.num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX); rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) return -ENOMEM; @@ -5309,6 +5305,7 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp) for (i = 0; i < bp->max_tc; i++) { bp->q_info[i].queue_id = *qptr++; bp->q_info[i].queue_profile = *qptr++; + bp->tc_to_qidx[i] = i; } qportcfg_exit: @@ -5376,7 +5373,8 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp) struct tm tm; time64_t now = ktime_get_real_seconds(); - if (bp->hwrm_spec_code < 0x10400) + if ((BNXT_VF(bp) && bp->hwrm_spec_code < 0x10901) || + bp->hwrm_spec_code < 0x10400) return -EOPNOTSUPP; time64_to_tm(now, 0, &tm); @@ -5958,6 +5956,9 @@ static int bnxt_init_msix(struct bnxt *bp) if (total_vecs > max) total_vecs = max; + if (!total_vecs) + return 0; + msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL); if (!msix_ent) return -ENOMEM; @@ -6843,6 +6844,8 @@ static void bnxt_preset_reg_win(struct bnxt *bp) } } +static int bnxt_init_dflt_ring_mode(struct bnxt *bp); + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -6850,6 +6853,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) bnxt_preset_reg_win(bp); netif_carrier_off(bp->dev); if (irq_re_init) { + /* Reserve rings now if none were reserved at driver probe. */ + rc = bnxt_init_dflt_ring_mode(bp); + if (rc) { + netdev_err(bp->dev, "Failed to reserve default rings at open\n"); + return rc; + } rc = bnxt_reserve_rings(bp); if (rc) return rc; @@ -6877,6 +6886,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } bnxt_enable_napi(bp); + bnxt_debug_dev_init(bp); rc = bnxt_init_nic(bp, irq_re_init); if (rc) { @@ -6909,6 +6919,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) return 0; open_err: + bnxt_debug_dev_exit(bp); bnxt_disable_napi(bp); bnxt_del_napi(bp); @@ -7002,6 +7013,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, /* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */ + bnxt_debug_dev_exit(bp); bnxt_disable_napi(bp); del_timer_sync(&bp->timer); bnxt_free_skbs(bp); @@ -7279,6 +7291,25 @@ skip_uc: return rc; } +static bool bnxt_can_reserve_rings(struct bnxt *bp) +{ +#ifdef CONFIG_BNXT_SRIOV + if ((bp->flags & BNXT_FLAG_NEW_RM) && BNXT_VF(bp)) { + struct bnxt_hw_resc *hw_resc = &bp->hw_resc; + + /* No minimum rings were provisioned by the PF. Don't + * reserve rings by default when device is down. + */ + if (hw_resc->min_tx_rings || hw_resc->resv_tx_rings) + return true; + + if (!netif_running(bp->dev)) + return false; + } +#endif + return true; +} + /* If the chip and firmware supports RFS */ static bool bnxt_rfs_supported(struct bnxt *bp) { @@ -7295,7 +7326,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL int vnics, max_vnics, max_rss_ctxs; - if (!(bp->flags & BNXT_FLAG_MSIX_CAP)) + if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp)) return false; vnics = 1 + bp->rx_nr_rings; @@ -7729,7 +7760,7 @@ static void bnxt_init_dflt_coal(struct bnxt *bp) coal->coal_bufs = 30; coal->coal_ticks_irq = 1; coal->coal_bufs_irq = 2; - coal->idle_thresh = 25; + coal->idle_thresh = 50; coal->bufs_per_record = 2; coal->budget = 64; /* NAPI budget */ @@ -8529,6 +8560,9 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) { int dflt_rings, max_rx_rings, max_tx_rings, rc; + if (!bnxt_can_reserve_rings(bp)) + return 0; + if (sh) bp->flags |= BNXT_FLAG_SHARED_RINGS; dflt_rings = netif_get_num_default_rss_queues(); @@ -8574,6 +8608,29 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) return rc; } +static int bnxt_init_dflt_ring_mode(struct bnxt *bp) +{ + int rc; + + if (bp->tx_nr_rings) + return 0; + + rc = bnxt_set_dflt_rings(bp, true); + if (rc) { + netdev_err(bp->dev, "Not enough rings available.\n"); + return rc; + } + rc = bnxt_init_int_mode(bp); + if (rc) + return rc; + bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) { + bp->flags |= BNXT_FLAG_RFS; + bp->dev->features |= NETIF_F_NTUPLE; + } + return 0; +} + int bnxt_restore_pf_fw_resources(struct bnxt *bp) { int rc; @@ -9078,6 +9135,7 @@ static struct pci_driver bnxt_pci_driver = { static int __init bnxt_init(void) { + bnxt_debug_init(); return pci_register_driver(&bnxt_pci_driver); } @@ -9086,6 +9144,7 @@ static void __exit bnxt_exit(void) pci_unregister_driver(&bnxt_pci_driver); if (bnxt_pf_wq) destroy_workqueue(bnxt_pf_wq); + bnxt_debug_exit(); } module_init(bnxt_init); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 3d55d3b56865..8df1d8b9d2e3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -532,6 +532,12 @@ struct rx_tpa_end_cmp_ext { #define BNXT_HWRM_REQ_MAX_SIZE 128 #define BNXT_HWRM_REQS_PER_PAGE (BNXT_PAGE_SIZE / \ BNXT_HWRM_REQ_MAX_SIZE) +#define HWRM_SHORT_MIN_TIMEOUT 3 +#define HWRM_SHORT_MAX_TIMEOUT 10 +#define HWRM_SHORT_TIMEOUT_COUNTER 5 + +#define HWRM_MIN_TIMEOUT 25 +#define HWRM_MAX_TIMEOUT 40 #define BNXT_RX_EVENT 1 #define BNXT_AGG_EVENT 2 @@ -1242,6 +1248,7 @@ struct bnxt { u8 max_tc; u8 max_lltc; /* lossless TCs */ struct bnxt_queue_info q_info[BNXT_MAX_QUEUE]; + u8 tc_to_qidx[BNXT_MAX_QUEUE]; unsigned int current_interval; #define BNXT_TIMER_INTERVAL HZ @@ -1384,6 +1391,8 @@ struct bnxt { u16 *cfa_code_map; /* cfa_code -> vf_idx map */ u8 switch_id[8]; struct bnxt_tc_info *tc_info; + struct dentry *debugfs_pdev; + struct dentry *debugfs_dim; }; #define BNXT_RX_STATS_OFFSET(counter) \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 3c746f2d9ed8..d5bc72cecde3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -21,6 +21,21 @@ #include "bnxt_dcb.h" #ifdef CONFIG_BNXT_DCB +static int bnxt_queue_to_tc(struct bnxt *bp, u8 queue_id) +{ + int i, j; + + for (i = 0; i < bp->max_tc; i++) { + if (bp->q_info[i].queue_id == queue_id) { + for (j = 0; j < bp->max_tc; j++) { + if (bp->tc_to_qidx[j] == i) + return j; + } + } + } + return -EINVAL; +} + static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets) { struct hwrm_queue_pri2cos_cfg_input req = {0}; @@ -33,10 +48,13 @@ static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets) pri2cos = &req.pri0_cos_queue_id; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + u8 qidx; + req.enables |= cpu_to_le32( QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID << i); - pri2cos[i] = bp->q_info[ets->prio_tc[i]].queue_id; + qidx = bp->tc_to_qidx[ets->prio_tc[i]]; + pri2cos[i] = bp->q_info[qidx].queue_id; } rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); return rc; @@ -55,17 +73,15 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets) rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) { u8 *pri2cos = &resp->pri0_cos_queue_id; - int i, j; + int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { u8 queue_id = pri2cos[i]; + int tc; - for (j = 0; j < bp->max_tc; j++) { - if (bp->q_info[j].queue_id == queue_id) { - ets->prio_tc[i] = j; - break; - } - } + tc = bnxt_queue_to_tc(bp, queue_id); + if (tc >= 0) + ets->prio_tc[i] = tc; } } mutex_unlock(&bp->hwrm_cmd_lock); @@ -81,13 +97,15 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets, void *data; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1); - data = &req.unused_0; - for (i = 0; i < max_tc; i++, data += sizeof(cos2bw) - 4) { + for (i = 0; i < max_tc; i++) { + u8 qidx; + req.enables |= cpu_to_le32( QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i); memset(&cos2bw, 0, sizeof(cos2bw)); - cos2bw.queue_id = bp->q_info[i].queue_id; + qidx = bp->tc_to_qidx[i]; + cos2bw.queue_id = bp->q_info[qidx].queue_id; if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) { cos2bw.tsa = QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP; @@ -103,8 +121,9 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets, cpu_to_le32((ets->tc_tx_bw[i] * 100) | BW_VALUE_UNIT_PERCENT1_100); } + data = &req.unused_0 + qidx * (sizeof(cos2bw) - 4); memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4); - if (i == 0) { + if (qidx == 0) { req.queue_id0 = cos2bw.queue_id; req.unused_0 = 0; } @@ -132,66 +151,81 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets) data = &resp->queue_id0 + offsetof(struct bnxt_cos2bw_cfg, queue_id); for (i = 0; i < bp->max_tc; i++, data += sizeof(cos2bw) - 4) { - int j; + int tc; memcpy(&cos2bw.queue_id, data, sizeof(cos2bw) - 4); if (i == 0) cos2bw.queue_id = resp->queue_id0; - for (j = 0; j < bp->max_tc; j++) { - if (bp->q_info[j].queue_id != cos2bw.queue_id) - continue; - if (cos2bw.tsa == - QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) { - ets->tc_tsa[j] = IEEE_8021QAZ_TSA_STRICT; - } else { - ets->tc_tsa[j] = IEEE_8021QAZ_TSA_ETS; - ets->tc_tx_bw[j] = cos2bw.bw_weight; - } + tc = bnxt_queue_to_tc(bp, cos2bw.queue_id); + if (tc < 0) + continue; + + if (cos2bw.tsa == + QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) { + ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_STRICT; + } else { + ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_ETS; + ets->tc_tx_bw[tc] = cos2bw.bw_weight; } } mutex_unlock(&bp->hwrm_cmd_lock); return 0; } -static int bnxt_hwrm_queue_cfg(struct bnxt *bp, unsigned int lltc_mask) +static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask) { - struct hwrm_queue_cfg_input req = {0}; - int i; + unsigned long qmap = 0; + int max = bp->max_tc; + int i, j, rc; - if (netif_running(bp->dev)) - bnxt_tx_disable(bp); + /* Assign lossless TCs first */ + for (i = 0, j = 0; i < max; ) { + if (lltc_mask & (1 << i)) { + if (BNXT_LLQ(bp->q_info[j].queue_profile)) { + bp->tc_to_qidx[i] = j; + __set_bit(j, &qmap); + i++; + } + j++; + continue; + } + i++; + } - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_CFG, -1, -1); - req.flags = cpu_to_le32(QUEUE_CFG_REQ_FLAGS_PATH_BIDIR); - req.enables = cpu_to_le32(QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE); + for (i = 0, j = 0; i < max; i++) { + if (lltc_mask & (1 << i)) + continue; + j = find_next_zero_bit(&qmap, max, j); + bp->tc_to_qidx[i] = j; + __set_bit(j, &qmap); + j++; + } - /* Configure lossless queues to lossy first */ - req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY; - for (i = 0; i < bp->max_tc; i++) { - if (BNXT_LLQ(bp->q_info[i].queue_profile)) { - req.queue_id = cpu_to_le32(bp->q_info[i].queue_id); - hwrm_send_message(bp, &req, sizeof(req), - HWRM_CMD_TIMEOUT); - bp->q_info[i].queue_profile = - QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY; + if (netif_running(bp->dev)) { + bnxt_close_nic(bp, false, false); + rc = bnxt_open_nic(bp, false, false); + if (rc) { + netdev_warn(bp->dev, "failed to open NIC, rc = %d\n", rc); + return rc; } } - - /* Now configure desired queues to lossless */ - req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS; - for (i = 0; i < bp->max_tc; i++) { - if (lltc_mask & (1 << i)) { - req.queue_id = cpu_to_le32(bp->q_info[i].queue_id); - hwrm_send_message(bp, &req, sizeof(req), - HWRM_CMD_TIMEOUT); - bp->q_info[i].queue_profile = - QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS; + if (bp->ieee_ets) { + int tc = netdev_get_num_tc(bp->dev); + + if (!tc) + tc = 1; + rc = bnxt_hwrm_queue_cos2bw_cfg(bp, bp->ieee_ets, tc); + if (rc) { + netdev_warn(bp->dev, "failed to config BW, rc = %d\n", rc); + return rc; + } + rc = bnxt_hwrm_queue_pri2cos_cfg(bp, bp->ieee_ets); + if (rc) { + netdev_warn(bp->dev, "failed to config prio, rc = %d\n", rc); + return rc; } } - if (netif_running(bp->dev)) - bnxt_tx_enable(bp); - return 0; } @@ -201,7 +235,7 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc) struct ieee_ets *my_ets = bp->ieee_ets; unsigned int tc_mask = 0, pri_mask = 0; u8 i, pri, lltc_count = 0; - bool need_q_recfg = false; + bool need_q_remap = false; int rc; if (!my_ets) @@ -221,21 +255,25 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc) if (lltc_count > bp->max_lltc) return -EINVAL; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1); - req.flags = cpu_to_le32(pri_mask); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (rc) - return rc; - for (i = 0; i < bp->max_tc; i++) { if (tc_mask & (1 << i)) { - if (!BNXT_LLQ(bp->q_info[i].queue_profile)) - need_q_recfg = true; + u8 qidx = bp->tc_to_qidx[i]; + + if (!BNXT_LLQ(bp->q_info[qidx].queue_profile)) { + need_q_remap = true; + break; + } } } - if (need_q_recfg) - rc = bnxt_hwrm_queue_cfg(bp, tc_mask); + if (need_q_remap) + rc = bnxt_queue_remap(bp, tc_mask); + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1); + req.flags = cpu_to_le32(pri_mask); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + return rc; return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c new file mode 100644 index 000000000000..94e208e9789f --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c @@ -0,0 +1,124 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2017-2018 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/pci.h> +#include "bnxt_hsi.h" +#include <linux/net_dim.h> +#include "bnxt.h" +#include "bnxt_debugfs.h" + +static struct dentry *bnxt_debug_mnt; + +static ssize_t debugfs_dim_read(struct file *filep, + char __user *buffer, + size_t count, loff_t *ppos) +{ + struct net_dim *dim = filep->private_data; + int len; + char *buf; + + if (*ppos) + return 0; + if (!dim) + return -ENODEV; + buf = kasprintf(GFP_KERNEL, + "state = %d\n" \ + "profile_ix = %d\n" \ + "mode = %d\n" \ + "tune_state = %d\n" \ + "steps_right = %d\n" \ + "steps_left = %d\n" \ + "tired = %d\n", + dim->state, + dim->profile_ix, + dim->mode, + dim->tune_state, + dim->steps_right, + dim->steps_left, + dim->tired); + if (!buf) + return -ENOMEM; + if (count < strlen(buf)) { + kfree(buf); + return -ENOSPC; + } + len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); + kfree(buf); + return len; +} + +static const struct file_operations debugfs_dim_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = debugfs_dim_read, +}; + +static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx, + struct dentry *dd) +{ + static char qname[16]; + + snprintf(qname, 10, "%d", ring_idx); + return debugfs_create_file(qname, 0600, dd, + dim, &debugfs_dim_fops); +} + +void bnxt_debug_dev_init(struct bnxt *bp) +{ + const char *pname = pci_name(bp->pdev); + struct dentry *pdevf; + int i; + + bp->debugfs_pdev = debugfs_create_dir(pname, bnxt_debug_mnt); + if (bp->debugfs_pdev) { + pdevf = debugfs_create_dir("dim", bp->debugfs_pdev); + if (!pdevf) { + pr_err("failed to create debugfs entry %s/dim\n", + pname); + return; + } + bp->debugfs_dim = pdevf; + /* create files for each rx ring */ + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + + if (cpr && bp->bnapi[i]->rx_ring) { + pdevf = debugfs_dim_ring_init(&cpr->dim, i, + bp->debugfs_dim); + if (!pdevf) + pr_err("failed to create debugfs entry %s/dim/%d\n", + pname, i); + } + } + } else { + pr_err("failed to create debugfs entry %s\n", pname); + } +} + +void bnxt_debug_dev_exit(struct bnxt *bp) +{ + if (bp) { + debugfs_remove_recursive(bp->debugfs_pdev); + bp->debugfs_pdev = NULL; + } +} + +void bnxt_debug_init(void) +{ + bnxt_debug_mnt = debugfs_create_dir("bnxt_en", NULL); + if (!bnxt_debug_mnt) + pr_err("failed to init bnxt_en debugfs\n"); +} + +void bnxt_debug_exit(void) +{ + debugfs_remove_recursive(bnxt_debug_mnt); +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h new file mode 100644 index 000000000000..d0bb4887acd0 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h @@ -0,0 +1,23 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2017-2018 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#include "bnxt_hsi.h" +#include "bnxt.h" + +#ifdef CONFIG_DEBUG_FS +void bnxt_debug_init(void); +void bnxt_debug_exit(void); +void bnxt_debug_dev_init(struct bnxt *bp); +void bnxt_debug_dev_exit(struct bnxt *bp); +#else +static inline void bnxt_debug_init(void) {} +static inline void bnxt_debug_exit(void) {} +static inline void bnxt_debug_dev_init(struct bnxt *bp) {} +static inline void bnxt_debug_dev_exit(struct bnxt *bp) {} +#endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index 408dd190331e..afa97c8bb081 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -21,11 +21,11 @@ void bnxt_dim_work(struct work_struct *work) struct bnxt_napi *bnapi = container_of(cpr, struct bnxt_napi, cp_ring); - struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode, - dim->profile_ix); + struct net_dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); - cpr->rx_ring_coal.coal_ticks = cur_profile.usec; - cpr->rx_ring_coal.coal_bufs = cur_profile.pkts; + cpr->rx_ring_coal.coal_ticks = cur_moder.usec; + cpr->rx_ring_coal.coal_bufs = cur_moder.pkts; bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi); dim->state = NET_DIM_START_MEASURE; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8ba14ae00e8f..ad98b78f5aa1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -140,6 +140,19 @@ reset_coalesce: #define BNXT_RX_STATS_EXT_ENTRY(counter) \ { BNXT_RX_STATS_EXT_OFFSET(counter), __stringify(counter) } +enum { + RX_TOTAL_DISCARDS, + TX_TOTAL_DISCARDS, +}; + +static struct { + u64 counter; + char string[ETH_GSTRING_LEN]; +} bnxt_sw_func_stats[] = { + {0, "rx_total_discard_pkts"}, + {0, "tx_total_discard_pkts"}, +}; + static const struct { long offset; char string[ETH_GSTRING_LEN]; @@ -237,6 +250,7 @@ static const struct { BNXT_RX_STATS_EXT_ENTRY(resume_roce_pause_events), }; +#define BNXT_NUM_SW_FUNC_STATS ARRAY_SIZE(bnxt_sw_func_stats) #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr) #define BNXT_NUM_PORT_STATS_EXT ARRAY_SIZE(bnxt_port_stats_ext_arr) @@ -244,6 +258,8 @@ static int bnxt_get_num_stats(struct bnxt *bp) { int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings; + num_stats += BNXT_NUM_SW_FUNC_STATS; + if (bp->flags & BNXT_FLAG_PORT_STATS) num_stats += BNXT_NUM_PORT_STATS; @@ -279,6 +295,9 @@ static void bnxt_get_ethtool_stats(struct net_device *dev, if (!bp->bnapi) return; + for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) + bnxt_sw_func_stats[i].counter = 0; + for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; @@ -288,7 +307,16 @@ static void bnxt_get_ethtool_stats(struct net_device *dev, for (k = 0; k < stat_fields; j++, k++) buf[j] = le64_to_cpu(hw_stats[k]); buf[j++] = cpr->rx_l4_csum_errors; + + bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter += + le64_to_cpu(cpr->hw_stats->rx_discard_pkts); + bnxt_sw_func_stats[TX_TOTAL_DISCARDS].counter += + le64_to_cpu(cpr->hw_stats->tx_discard_pkts); } + + for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++) + buf[j] = bnxt_sw_func_stats[i].counter; + if (bp->flags & BNXT_FLAG_PORT_STATS) { __le64 *port_stats = (__le64 *)bp->hw_rx_port_stats; @@ -359,6 +387,11 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf) sprintf(buf, "[%d]: rx_l4_csum_errors", i); buf += ETH_GSTRING_LEN; } + for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) { + strcpy(buf, bnxt_sw_func_stats[i].string); + buf += ETH_GSTRING_LEN; + } + if (bp->flags & BNXT_FLAG_PORT_STATS) { for (i = 0; i < BNXT_NUM_PORT_STATS; i++) { strcpy(buf, bnxt_port_stats_arr[i].string); @@ -551,6 +584,8 @@ static int bnxt_set_channels(struct net_device *dev, * to renable */ } + } else { + rc = bnxt_reserve_rings(bp); } return rc; @@ -1785,6 +1820,11 @@ static int nvm_get_dir_info(struct net_device *dev, u32 *entries, u32 *length) static int bnxt_get_eeprom_len(struct net_device *dev) { + struct bnxt *bp = netdev_priv(dev); + + if (BNXT_VF(bp)) + return 0; + /* The -1 return value allows the entire 32-bit range of offsets to be * passed via the ethtool command-line utility. */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index f952963d594e..cc21d874eb6e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -462,13 +462,13 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs) vf_vnics = hw_resc->max_vnics - bp->nr_vnics; vf_vnics = min_t(u16, vf_vnics, vf_rx_rings); - req.min_rsscos_ctx = cpu_to_le16(1); - req.max_rsscos_ctx = cpu_to_le16(1); + req.min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX); + req.max_rsscos_ctx = cpu_to_le16(BNXT_VF_MAX_RSS_CTX); if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL) { req.min_cmpl_rings = cpu_to_le16(1); req.min_tx_rings = cpu_to_le16(1); req.min_rx_rings = cpu_to_le16(1); - req.min_l2_ctxs = cpu_to_le16(1); + req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MIN_L2_CTX); req.min_vnics = cpu_to_le16(1); req.min_stat_ctx = cpu_to_le16(1); req.min_hw_ring_grps = cpu_to_le16(1); @@ -483,7 +483,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs) req.min_cmpl_rings = cpu_to_le16(vf_cp_rings); req.min_tx_rings = cpu_to_le16(vf_tx_rings); req.min_rx_rings = cpu_to_le16(vf_rx_rings); - req.min_l2_ctxs = cpu_to_le16(4); + req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX); req.min_vnics = cpu_to_le16(vf_vnics); req.min_stat_ctx = cpu_to_le16(vf_stat_ctx); req.min_hw_ring_grps = cpu_to_le16(vf_ring_grps); @@ -491,7 +491,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs) req.max_cmpl_rings = cpu_to_le16(vf_cp_rings); req.max_tx_rings = cpu_to_le16(vf_tx_rings); req.max_rx_rings = cpu_to_le16(vf_rx_rings); - req.max_l2_ctxs = cpu_to_le16(4); + req.max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX); req.max_vnics = cpu_to_le16(vf_vnics); req.max_stat_ctx = cpu_to_le16(vf_stat_ctx); req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps); @@ -809,6 +809,9 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf, struct hwrm_fwd_resp_input req = {0}; struct hwrm_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr; + if (BNXT_FWD_RESP_SIZE_ERR(msg_size)) + return -EINVAL; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_RESP, -1, -1); /* Set the new target id */ @@ -845,6 +848,9 @@ static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf, struct hwrm_reject_fwd_resp_input req = {0}; struct hwrm_reject_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr; + if (BNXT_REJ_FWD_RESP_SIZE_ERR(msg_size)) + return -EINVAL; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_REJECT_FWD_RESP, -1, -1); /* Set the new target id */ req.target_id = cpu_to_le16(vf->fw_fid); @@ -877,6 +883,9 @@ static int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf, struct hwrm_exec_fwd_resp_input req = {0}; struct hwrm_exec_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr; + if (BNXT_EXEC_FWD_RESP_SIZE_ERR(msg_size)) + return -EINVAL; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_EXEC_FWD_RESP, -1, -1); /* Set the new target id */ req.target_id = cpu_to_le16(vf->fw_fid); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index d10f6f6c7860..e9b20cd19881 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -11,6 +11,23 @@ #ifndef BNXT_SRIOV_H #define BNXT_SRIOV_H +#define BNXT_FWD_RESP_SIZE_ERR(n) \ + ((offsetof(struct hwrm_fwd_resp_input, encap_resp) + n) > \ + sizeof(struct hwrm_fwd_resp_input)) + +#define BNXT_EXEC_FWD_RESP_SIZE_ERR(n) \ + ((offsetof(struct hwrm_exec_fwd_resp_input, encap_request) + n) >\ + offsetof(struct hwrm_exec_fwd_resp_input, encap_resp_target_id)) + +#define BNXT_REJ_FWD_RESP_SIZE_ERR(n) \ + ((offsetof(struct hwrm_reject_fwd_resp_input, encap_request) + n) >\ + offsetof(struct hwrm_reject_fwd_resp_input, encap_resp_target_id)) + +#define BNXT_VF_MIN_RSS_CTX 1 +#define BNXT_VF_MAX_RSS_CTX 1 +#define BNXT_VF_MIN_L2_CTX 1 +#define BNXT_VF_MAX_L2_CTX 4 + int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *); int bnxt_set_vf_mac(struct net_device *, int, u8 *); int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 1389ab5e05df..1f0e872d0667 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, if (tx_avail != bp->tx_ring_size) *event &= ~BNXT_RX_EVENT; + *len = xdp.data_end - xdp.data; if (orig_data != xdp.data) { offset = xdp.data - xdp.data_hard_start; *data_ptr = xdp.data_hard_start + offset; - *len = xdp.data_end - xdp.data; } switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0445f2c0c629..20c1681bb1af 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -652,7 +652,7 @@ static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring, pkts = ring->rx_max_coalesced_frames; if (ec->use_adaptive_rx_coalesce && !ring->dim.use_dim) { - moder = net_dim_get_def_profile(ring->dim.dim.mode); + moder = net_dim_get_def_rx_moderation(ring->dim.dim.mode); usecs = moder.usec; pkts = moder.pkts; } @@ -1925,7 +1925,7 @@ static void bcmgenet_dim_work(struct work_struct *work) struct bcmgenet_rx_ring *ring = container_of(ndim, struct bcmgenet_rx_ring, dim); struct net_dim_cq_moder cur_profile = - net_dim_get_profile(dim->mode, dim->profile_ix); + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts); dim->state = NET_DIM_START_MEASURE; @@ -2102,7 +2102,7 @@ static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring) /* If DIM was enabled, re-apply default parameters */ if (dim->use_dim) { - moder = net_dim_get_def_profile(dim->dim.mode); + moder = net_dim_get_def_rx_moderation(dim->dim.mode); usecs = moder.usec; pkts = moder.pkts; } diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index e8b290473ee2..929d485a3a2f 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -1245,7 +1245,7 @@ static void cn23xx_setup_reg_address(struct octeon_device *oct) CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num); } -static int cn23xx_sriov_config(struct octeon_device *oct) +int cn23xx_sriov_config(struct octeon_device *oct) { struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; u32 max_rings, total_rings, max_vfs, rings_per_vf; @@ -1269,8 +1269,8 @@ static int cn23xx_sriov_config(struct octeon_device *oct) break; } - if (max_rings <= num_present_cpus()) - num_pf_rings = 1; + if (oct->sriov_info.num_pf_rings) + num_pf_rings = oct->sriov_info.num_pf_rings; else num_pf_rings = num_present_cpus(); @@ -1497,3 +1497,57 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, octeon_mbox_write(oct, &mbox_cmd); } } + +static void +cn23xx_get_vf_stats_callback(struct octeon_device *oct, + struct octeon_mbox_cmd *cmd, void *arg) +{ + struct oct_vf_stats_ctx *ctx = arg; + + memcpy(ctx->stats, cmd->data, sizeof(struct oct_vf_stats)); + atomic_set(&ctx->status, 1); +} + +int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx, + struct oct_vf_stats *stats) +{ + u32 timeout = HZ; // 1sec + struct octeon_mbox_cmd mbox_cmd; + struct oct_vf_stats_ctx ctx; + u32 count = 0, ret; + + if (!(oct->sriov_info.vf_drv_loaded_mask & (1ULL << vfidx))) + return -1; + + if (sizeof(struct oct_vf_stats) > sizeof(mbox_cmd.data)) + return -1; + + mbox_cmd.msg.u64 = 0; + mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST; + mbox_cmd.msg.s.resp_needed = 1; + mbox_cmd.msg.s.cmd = OCTEON_GET_VF_STATS; + mbox_cmd.msg.s.len = 1; + mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf; + mbox_cmd.recv_len = 0; + mbox_cmd.recv_status = 0; + mbox_cmd.fn = (octeon_mbox_callback_t)cn23xx_get_vf_stats_callback; + ctx.stats = stats; + atomic_set(&ctx.status, 0); + mbox_cmd.fn_arg = (void *)&ctx; + memset(mbox_cmd.data, 0, sizeof(mbox_cmd.data)); + octeon_mbox_write(oct, &mbox_cmd); + + do { + schedule_timeout_uninterruptible(1); + } while ((atomic_read(&ctx.status) == 0) && (count++ < timeout)); + + ret = atomic_read(&ctx.status); + if (ret == 0) { + octeon_mbox_cancel(oct, 0); + dev_err(&oct->pci_dev->dev, "Unable to get stats from VF-%d, timedout\n", + vfidx); + return -1; + } + + return 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h index 2aba5247b6d8..e6f31d0d5c0b 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -43,6 +43,15 @@ struct octeon_cn23xx_pf { #define CN23XX_SLI_DEF_BP 0x40 +struct oct_vf_stats { + u64 rx_packets; + u64 tx_packets; + u64 rx_bytes; + u64 tx_bytes; + u64 broadcast; + u64 multicast; +}; + int setup_cn23xx_octeon_pf_device(struct octeon_device *oct); int validate_cn23xx_pf_config_info(struct octeon_device *oct, @@ -52,8 +61,13 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us); void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct); +int cn23xx_sriov_config(struct octeon_device *oct); + int cn23xx_fw_loaded(struct octeon_device *oct); void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, u8 *mac); + +int cn23xx_get_vf_stats(struct octeon_device *oct, int ifidx, + struct oct_vf_stats *stats); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 2a94eee943b2..6821afcdc365 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -29,6 +29,162 @@ /* OOM task polling interval */ #define LIO_OOM_POLL_INTERVAL_MS 250 +#define OCTNIC_MAX_SG MAX_SKB_FRAGS + +/** + * \brief Callback for getting interface configuration + * @param status status of request + * @param buf pointer to resp structure + */ +void lio_if_cfg_callback(struct octeon_device *oct, + u32 status __attribute__((unused)), void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + struct liquidio_if_cfg_context *ctx; + struct liquidio_if_cfg_resp *resp; + + resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; + ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; + + oct = lio_get_device(ctx->octeon_id); + if (resp->status) + dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n", + CVM_CAST64(resp->status)); + WRITE_ONCE(ctx->cond, 1); + + snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s", + resp->cfg_info.liquidio_firmware_version); + + /* This barrier is required to be sure that the response has been + * written fully before waking up the handler + */ + wmb(); + + wake_up_interruptible(&ctx->wc); +} + +/** + * \brief Delete gather lists + * @param lio per-network private data + */ +void lio_delete_glists(struct lio *lio) +{ + struct octnic_gather *g; + int i; + + kfree(lio->glist_lock); + lio->glist_lock = NULL; + + if (!lio->glist) + return; + + for (i = 0; i < lio->oct_dev->num_iqs; i++) { + do { + g = (struct octnic_gather *) + lio_list_delete_head(&lio->glist[i]); + kfree(g); + } while (g); + + if (lio->glists_virt_base && lio->glists_virt_base[i] && + lio->glists_dma_base && lio->glists_dma_base[i]) { + lio_dma_free(lio->oct_dev, + lio->glist_entry_size * lio->tx_qsize, + lio->glists_virt_base[i], + lio->glists_dma_base[i]); + } + } + + kfree(lio->glists_virt_base); + lio->glists_virt_base = NULL; + + kfree(lio->glists_dma_base); + lio->glists_dma_base = NULL; + + kfree(lio->glist); + lio->glist = NULL; +} + +/** + * \brief Setup gather lists + * @param lio per-network private data + */ +int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) +{ + struct octnic_gather *g; + int i, j; + + lio->glist_lock = + kcalloc(num_iqs, sizeof(*lio->glist_lock), GFP_KERNEL); + if (!lio->glist_lock) + return -ENOMEM; + + lio->glist = + kcalloc(num_iqs, sizeof(*lio->glist), GFP_KERNEL); + if (!lio->glist) { + kfree(lio->glist_lock); + lio->glist_lock = NULL; + return -ENOMEM; + } + + lio->glist_entry_size = + ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE); + + /* allocate memory to store virtual and dma base address of + * per glist consistent memory + */ + lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base), + GFP_KERNEL); + lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base), + GFP_KERNEL); + + if (!lio->glists_virt_base || !lio->glists_dma_base) { + lio_delete_glists(lio); + return -ENOMEM; + } + + for (i = 0; i < num_iqs; i++) { + int numa_node = dev_to_node(&oct->pci_dev->dev); + + spin_lock_init(&lio->glist_lock[i]); + + INIT_LIST_HEAD(&lio->glist[i]); + + lio->glists_virt_base[i] = + lio_dma_alloc(oct, + lio->glist_entry_size * lio->tx_qsize, + &lio->glists_dma_base[i]); + + if (!lio->glists_virt_base[i]) { + lio_delete_glists(lio); + return -ENOMEM; + } + + for (j = 0; j < lio->tx_qsize; j++) { + g = kzalloc_node(sizeof(*g), GFP_KERNEL, + numa_node); + if (!g) + g = kzalloc(sizeof(*g), GFP_KERNEL); + if (!g) + break; + + g->sg = lio->glists_virt_base[i] + + (j * lio->glist_entry_size); + + g->sg_dma_ptr = lio->glists_dma_base[i] + + (j * lio->glist_entry_size); + + list_add_tail(&g->list, &lio->glist[i]); + } + + if (j != lio->tx_qsize) { + lio_delete_glists(lio); + return -ENOMEM; + } + } + + return 0; +} + int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) { struct lio *lio = GET_LIO(netdev); @@ -880,8 +1036,8 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs) int num_ioq_vectors; int irqret, err; - oct->num_msix_irqs = num_ioqs; if (oct->msix_on) { + oct->num_msix_irqs = num_ioqs; if (OCTEON_CN23XX_PF(oct)) { num_interrupts = MAX_IOQ_INTERRUPTS_PER_PF + 1; @@ -1169,3 +1325,159 @@ int lio_wait_for_clean_oq(struct octeon_device *oct) return pending_pkts; } + +static void +octnet_nic_stats_callback(struct octeon_device *oct_dev, + u32 status, void *ptr) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; + struct oct_nic_stats_resp *resp = + (struct oct_nic_stats_resp *)sc->virtrptr; + struct oct_nic_stats_ctrl *ctrl = + (struct oct_nic_stats_ctrl *)sc->ctxptr; + struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire; + struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost; + struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire; + struct nic_tx_stats *tstats = &oct_dev->link_stats.fromhost; + + if (status != OCTEON_REQUEST_TIMEOUT && !resp->status) { + octeon_swap_8B_data((u64 *)&resp->stats, + (sizeof(struct oct_link_stats)) >> 3); + + /* RX link-level stats */ + rstats->total_rcvd = rsp_rstats->total_rcvd; + rstats->bytes_rcvd = rsp_rstats->bytes_rcvd; + rstats->total_bcst = rsp_rstats->total_bcst; + rstats->total_mcst = rsp_rstats->total_mcst; + rstats->runts = rsp_rstats->runts; + rstats->ctl_rcvd = rsp_rstats->ctl_rcvd; + /* Accounts for over/under-run of buffers */ + rstats->fifo_err = rsp_rstats->fifo_err; + rstats->dmac_drop = rsp_rstats->dmac_drop; + rstats->fcs_err = rsp_rstats->fcs_err; + rstats->jabber_err = rsp_rstats->jabber_err; + rstats->l2_err = rsp_rstats->l2_err; + rstats->frame_err = rsp_rstats->frame_err; + rstats->red_drops = rsp_rstats->red_drops; + + /* RX firmware stats */ + rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd; + rstats->fw_total_fwd = rsp_rstats->fw_total_fwd; + rstats->fw_total_mcast = rsp_rstats->fw_total_mcast; + rstats->fw_total_bcast = rsp_rstats->fw_total_bcast; + rstats->fw_err_pko = rsp_rstats->fw_err_pko; + rstats->fw_err_link = rsp_rstats->fw_err_link; + rstats->fw_err_drop = rsp_rstats->fw_err_drop; + rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan; + rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err; + + /* Number of packets that are LROed */ + rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts; + /* Number of octets that are LROed */ + rstats->fw_lro_octs = rsp_rstats->fw_lro_octs; + /* Number of LRO packets formed */ + rstats->fw_total_lro = rsp_rstats->fw_total_lro; + /* Number of times lRO of packet aborted */ + rstats->fw_lro_aborts = rsp_rstats->fw_lro_aborts; + rstats->fw_lro_aborts_port = rsp_rstats->fw_lro_aborts_port; + rstats->fw_lro_aborts_seq = rsp_rstats->fw_lro_aborts_seq; + rstats->fw_lro_aborts_tsval = rsp_rstats->fw_lro_aborts_tsval; + rstats->fw_lro_aborts_timer = rsp_rstats->fw_lro_aborts_timer; + /* intrmod: packet forward rate */ + rstats->fwd_rate = rsp_rstats->fwd_rate; + + /* TX link-level stats */ + tstats->total_pkts_sent = rsp_tstats->total_pkts_sent; + tstats->total_bytes_sent = rsp_tstats->total_bytes_sent; + tstats->mcast_pkts_sent = rsp_tstats->mcast_pkts_sent; + tstats->bcast_pkts_sent = rsp_tstats->bcast_pkts_sent; + tstats->ctl_sent = rsp_tstats->ctl_sent; + /* Packets sent after one collision*/ + tstats->one_collision_sent = rsp_tstats->one_collision_sent; + /* Packets sent after multiple collision*/ + tstats->multi_collision_sent = rsp_tstats->multi_collision_sent; + /* Packets not sent due to max collisions */ + tstats->max_collision_fail = rsp_tstats->max_collision_fail; + /* Packets not sent due to max deferrals */ + tstats->max_deferral_fail = rsp_tstats->max_deferral_fail; + /* Accounts for over/under-run of buffers */ + tstats->fifo_err = rsp_tstats->fifo_err; + tstats->runts = rsp_tstats->runts; + /* Total number of collisions detected */ + tstats->total_collisions = rsp_tstats->total_collisions; + + /* firmware stats */ + tstats->fw_total_sent = rsp_tstats->fw_total_sent; + tstats->fw_total_fwd = rsp_tstats->fw_total_fwd; + tstats->fw_total_mcast_sent = rsp_tstats->fw_total_mcast_sent; + tstats->fw_total_bcast_sent = rsp_tstats->fw_total_bcast_sent; + tstats->fw_err_pko = rsp_tstats->fw_err_pko; + tstats->fw_err_pki = rsp_tstats->fw_err_pki; + tstats->fw_err_link = rsp_tstats->fw_err_link; + tstats->fw_err_drop = rsp_tstats->fw_err_drop; + tstats->fw_tso = rsp_tstats->fw_tso; + tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd; + tstats->fw_err_tso = rsp_tstats->fw_err_tso; + tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan; + + resp->status = 1; + } else { + resp->status = -1; + } + complete(&ctrl->complete); +} + +int octnet_get_link_stats(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct_dev = lio->oct_dev; + struct octeon_soft_command *sc; + struct oct_nic_stats_ctrl *ctrl; + struct oct_nic_stats_resp *resp; + int retval; + + /* Alloc soft command */ + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct_dev, + 0, + sizeof(struct oct_nic_stats_resp), + sizeof(struct octnic_ctrl_pkt)); + + if (!sc) + return -ENOMEM; + + resp = (struct oct_nic_stats_resp *)sc->virtrptr; + memset(resp, 0, sizeof(struct oct_nic_stats_resp)); + + ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr; + memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl)); + ctrl->netdev = netdev; + init_completion(&ctrl->complete); + + sc->iq_no = lio->linfo.txpciq[0].s.q_no; + + octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC, + OPCODE_NIC_PORT_STATS, 0, 0, 0); + + sc->callback = octnet_nic_stats_callback; + sc->callback_arg = sc; + sc->wait_time = 500; /*in milli seconds*/ + + retval = octeon_send_soft_command(oct_dev, sc); + if (retval == IQ_SEND_FAILED) { + octeon_free_soft_command(oct_dev, sc); + return -EINVAL; + } + + wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000)); + + if (resp->status != 1) { + octeon_free_soft_command(oct_dev, sc); + + return -EINVAL; + } + + octeon_free_soft_command(oct_dev, sc); + + return 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 550ac29682a5..a1d84304a608 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -32,7 +32,6 @@ #include "cn23xx_vf_device.h" static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs); -static int octnet_get_link_stats(struct net_device *netdev); struct oct_intrmod_context { int octeon_id; @@ -96,11 +95,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "tx_packets", "rx_bytes", "tx_bytes", - "rx_errors", /*jabber_err+l2_err+frame_err */ - "tx_errors", /*fw_err_pko+fw_err_link+fw_err_drop */ - "rx_dropped", /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd + - *st->fromwire.dmac_drop + st->fromwire.fw_err_drop - */ + "rx_errors", + "tx_errors", + "rx_dropped", "tx_dropped", "tx_total_sent", @@ -115,14 +112,17 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "tx_tso_err", "tx_vxlan", + "tx_mcast", + "tx_bcast", + "mac_tx_total_pkts", "mac_tx_total_bytes", "mac_tx_mcast_pkts", "mac_tx_bcast_pkts", - "mac_tx_ctl_packets", /*oct->link_stats.fromhost.ctl_sent */ + "mac_tx_ctl_packets", "mac_tx_total_collisions", "mac_tx_one_collision", - "mac_tx_multi_collison", + "mac_tx_multi_collision", "mac_tx_max_collision_fail", "mac_tx_max_deferal_fail", "mac_tx_fifo_err", @@ -130,6 +130,8 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "rx_total_rcvd", "rx_total_fwd", + "rx_mcast", + "rx_bcast", "rx_jabber_err", "rx_l2_err", "rx_frame_err", @@ -170,17 +172,21 @@ static const char oct_vf_stats_strings[][ETH_GSTRING_LEN] = { "tx_packets", "rx_bytes", "tx_bytes", - "rx_errors", /* jabber_err + l2_err+frame_err */ - "tx_errors", /* fw_err_pko + fw_err_link+fw_err_drop */ - "rx_dropped", /* total_rcvd - fw_total_rcvd + dmac_drop + fw_err_drop */ + "rx_errors", + "tx_errors", + "rx_dropped", "tx_dropped", + "rx_mcast", + "tx_mcast", + "rx_bcast", + "tx_bcast", "link_state_changes", }; /* statistics of host tx queue */ static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", /*oct->instr_queue[iq_no]->stats.tx_done*/ - "bytes", /*oct->instr_queue[iq_no]->stats.tx_tot_bytes*/ + "packets", + "bytes", "dropped", "iq_busy", "sgentry_sent", @@ -197,13 +203,9 @@ static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = { /* statistics of host rx queue */ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", /*oct->droq[oq_no]->stats.rx_pkts_received */ - "bytes", /*oct->droq[oq_no]->stats.rx_bytes_received */ - "dropped", /*oct->droq[oq_no]->stats.rx_dropped+ - *oct->droq[oq_no]->stats.dropped_nodispatch+ - *oct->droq[oq_no]->stats.dropped_toomany+ - *oct->droq[oq_no]->stats.dropped_nomem - */ + "packets", + "bytes", + "dropped", "dropped_nomem", "dropped_toomany", "fw_dropped", @@ -359,7 +361,14 @@ lio_ethtool_get_channels(struct net_device *dev, rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx); tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx); } else if (OCTEON_CN23XX_PF(oct)) { - max_combined = lio->linfo.num_txpciq; + if (oct->sriov_info.sriov_enabled) { + max_combined = lio->linfo.num_txpciq; + } else { + struct octeon_config *conf23_pf = + CHIP_CONF(oct, cn23xx_pf); + + max_combined = CFG_GET_IQ_MAX_Q(conf23_pf); + } combined_count = oct->num_iqs; } else if (OCTEON_CN23XX_VF(oct)) { u64 reg_val = 0ULL; @@ -423,9 +432,15 @@ lio_irq_reallocate_irqs(struct octeon_device *oct, uint32_t num_ioqs) kfree(oct->irq_name_storage); oct->irq_name_storage = NULL; + + if (octeon_allocate_ioq_vector(oct, num_ioqs)) { + dev_err(&oct->pci_dev->dev, "OCTEON: ioq vector allocation failed\n"); + return -1; + } + if (octeon_setup_interrupt(oct, num_ioqs)) { dev_info(&oct->pci_dev->dev, "Setup interrupt failed\n"); - return 1; + return -1; } /* Enable Octeon device interrupts */ @@ -455,7 +470,16 @@ lio_ethtool_set_channels(struct net_device *dev, combined_count = channel->combined_count; if (OCTEON_CN23XX_PF(oct)) { - max_combined = channel->max_combined; + if (oct->sriov_info.sriov_enabled) { + max_combined = lio->linfo.num_txpciq; + } else { + struct octeon_config *conf23_pf = + CHIP_CONF(oct, + cn23xx_pf); + + max_combined = + CFG_GET_IQ_MAX_Q(conf23_pf); + } } else if (OCTEON_CN23XX_VF(oct)) { u64 reg_val = 0ULL; u64 ctrl = CN23XX_VF_SLI_IQ_PKT_CONTROL64(0); @@ -483,7 +507,6 @@ lio_ethtool_set_channels(struct net_device *dev, if (lio_reset_queues(dev, combined_count)) return -EINVAL; - lio_irq_reallocate_irqs(oct, combined_count); if (stopped) dev->netdev_ops->ndo_open(dev); @@ -822,12 +845,120 @@ lio_ethtool_get_ringparam(struct net_device *netdev, ering->rx_jumbo_max_pending = 0; } +static int lio_23xx_reconfigure_queue_count(struct lio *lio) +{ + struct octeon_device *oct = lio->oct_dev; + struct liquidio_if_cfg_context *ctx; + u32 resp_size, ctx_size, data_size; + struct liquidio_if_cfg_resp *resp; + struct octeon_soft_command *sc; + union oct_nic_if_cfg if_cfg; + struct lio_version *vdata; + u32 ifidx_or_pfnum; + int retval; + int j; + + resp_size = sizeof(struct liquidio_if_cfg_resp); + ctx_size = sizeof(struct liquidio_if_cfg_context); + data_size = sizeof(struct lio_version); + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, data_size, + resp_size, ctx_size); + if (!sc) { + dev_err(&oct->pci_dev->dev, "%s: Failed to allocate soft command\n", + __func__); + return -1; + } + + resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; + ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; + vdata = (struct lio_version *)sc->virtdptr; + + vdata->major = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION); + vdata->minor = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION); + vdata->micro = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION); + + ifidx_or_pfnum = oct->pf_num; + WRITE_ONCE(ctx->cond, 0); + ctx->octeon_id = lio_get_device_id(oct); + init_waitqueue_head(&ctx->wc); + + if_cfg.u64 = 0; + if_cfg.s.num_iqueues = oct->sriov_info.num_pf_rings; + if_cfg.s.num_oqueues = oct->sriov_info.num_pf_rings; + if_cfg.s.base_queue = oct->sriov_info.pf_srn; + if_cfg.s.gmx_port_id = oct->pf_num; + + sc->iq_no = 0; + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_QCOUNT_UPDATE, 0, + if_cfg.u64, 0); + sc->callback = lio_if_cfg_callback; + sc->callback_arg = sc; + sc->wait_time = LIO_IFCFG_WAIT_TIME; + + retval = octeon_send_soft_command(oct, sc); + if (retval == IQ_SEND_FAILED) { + dev_err(&oct->pci_dev->dev, + "iq/oq config failed status: %x\n", + retval); + goto qcount_update_fail; + } + + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) { + dev_err(&oct->pci_dev->dev, "Wait interrupted\n"); + return -1; + } + + retval = resp->status; + if (retval) { + dev_err(&oct->pci_dev->dev, "iq/oq config failed\n"); + goto qcount_update_fail; + } + + octeon_swap_8B_data((u64 *)(&resp->cfg_info), + (sizeof(struct liquidio_if_cfg_info)) >> 3); + + lio->ifidx = ifidx_or_pfnum; + lio->linfo.num_rxpciq = hweight64(resp->cfg_info.iqmask); + lio->linfo.num_txpciq = hweight64(resp->cfg_info.iqmask); + for (j = 0; j < lio->linfo.num_rxpciq; j++) { + lio->linfo.rxpciq[j].u64 = + resp->cfg_info.linfo.rxpciq[j].u64; + } + + for (j = 0; j < lio->linfo.num_txpciq; j++) { + lio->linfo.txpciq[j].u64 = + resp->cfg_info.linfo.txpciq[j].u64; + } + + lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr; + lio->linfo.gmxport = resp->cfg_info.linfo.gmxport; + lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64; + lio->txq = lio->linfo.txpciq[0].s.q_no; + lio->rxq = lio->linfo.rxpciq[0].s.q_no; + + octeon_free_soft_command(oct, sc); + dev_info(&oct->pci_dev->dev, "Queue count updated to %d\n", + lio->linfo.num_rxpciq); + + return 0; + +qcount_update_fail: + octeon_free_soft_command(oct, sc); + + return -1; +} + static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; + int i, queue_count_update = 0; struct napi_struct *napi, *n; - int i, update = 0; + int ret; + + schedule_timeout_uninterruptible(msecs_to_jiffies(100)); if (wait_for_pending_requests(oct)) dev_err(&oct->pci_dev->dev, "There were pending requests\n"); @@ -836,7 +967,7 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs) dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n"); if (octeon_set_io_queues_off(oct)) { - dev_err(&oct->pci_dev->dev, "setting io queues off failed\n"); + dev_err(&oct->pci_dev->dev, "Setting io queues off failed\n"); return -1; } @@ -849,9 +980,40 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs) netif_napi_del(napi); if (num_qs != oct->num_iqs) { - netif_set_real_num_rx_queues(netdev, num_qs); - netif_set_real_num_tx_queues(netdev, num_qs); - update = 1; + ret = netif_set_real_num_rx_queues(netdev, num_qs); + if (ret) { + dev_err(&oct->pci_dev->dev, + "Setting real number rx failed\n"); + return ret; + } + + ret = netif_set_real_num_tx_queues(netdev, num_qs); + if (ret) { + dev_err(&oct->pci_dev->dev, + "Setting real number tx failed\n"); + return ret; + } + + /* The value of queue_count_update decides whether it is the + * queue count or the descriptor count that is being + * re-configured. + */ + queue_count_update = 1; + } + + /* Re-configuration of queues can happen in two scenarios, SRIOV enabled + * and SRIOV disabled. Few things like recreating queue zero, resetting + * glists and IRQs are required for both. For the latter, some more + * steps like updating sriov_info for the octeon device need to be done. + */ + if (queue_count_update) { + lio_delete_glists(lio); + + /* Delete mbox for PF which is SRIOV disabled because sriov_info + * will be now changed. + */ + if ((OCTEON_CN23XX_PF(oct)) && !oct->sriov_info.sriov_enabled) + oct->fn_list.free_mbox(oct); } for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { @@ -866,24 +1028,91 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs) octeon_delete_instr_queue(oct, i); } + if (queue_count_update) { + /* For PF re-configure sriov related information */ + if ((OCTEON_CN23XX_PF(oct)) && + !oct->sriov_info.sriov_enabled) { + oct->sriov_info.num_pf_rings = num_qs; + if (cn23xx_sriov_config(oct)) { + dev_err(&oct->pci_dev->dev, + "Queue reset aborted: SRIOV config failed\n"); + return -1; + } + + num_qs = oct->sriov_info.num_pf_rings; + } + } + if (oct->fn_list.setup_device_regs(oct)) { dev_err(&oct->pci_dev->dev, "Failed to configure device registers\n"); return -1; } - if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) { - dev_err(&oct->pci_dev->dev, "IO queues initialization failed\n"); - return -1; + /* The following are needed in case of queue count re-configuration and + * not for descriptor count re-configuration. + */ + if (queue_count_update) { + if (octeon_setup_instr_queues(oct)) + return -1; + + if (octeon_setup_output_queues(oct)) + return -1; + + /* Recreating mbox for PF that is SRIOV disabled */ + if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) { + if (oct->fn_list.setup_mbox(oct)) { + dev_err(&oct->pci_dev->dev, "Mailbox setup failed\n"); + return -1; + } + } + + /* Deleting and recreating IRQs whether the interface is SRIOV + * enabled or disabled. + */ + if (lio_irq_reallocate_irqs(oct, num_qs)) { + dev_err(&oct->pci_dev->dev, "IRQs could not be allocated\n"); + return -1; + } + + /* Enable the input and output queues for this Octeon device */ + if (oct->fn_list.enable_io_queues(oct)) { + dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues\n"); + return -1; + } + + for (i = 0; i < oct->num_oqs; i++) + writel(oct->droq[i]->max_count, + oct->droq[i]->pkts_credit_reg); + + /* Informing firmware about the new queue count. It is required + * for firmware to allocate more number of queues than those at + * load time. + */ + if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) { + if (lio_23xx_reconfigure_queue_count(lio)) + return -1; + } } - /* Enable the input and output queues for this Octeon device */ - if (oct->fn_list.enable_io_queues(oct)) { - dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues"); + /* Once firmware is aware of the new value, queues can be recreated */ + if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) { + dev_err(&oct->pci_dev->dev, "I/O queues creation failed\n"); return -1; } - if (update && lio_send_queue_count_update(netdev, num_qs)) - return -1; + if (queue_count_update) { + if (lio_setup_glists(oct, lio, num_qs)) { + dev_err(&oct->pci_dev->dev, "Gather list allocation failed\n"); + return -1; + } + + /* Send firmware the information about new number of queues + * if the interface is a VF or a PF that is SRIOV enabled. + */ + if (oct->sriov_info.sriov_enabled || OCTEON_CN23XX_VF(oct)) + if (lio_send_queue_count_update(netdev, num_qs)) + return -1; + } return 0; } @@ -928,7 +1157,7 @@ static int lio_ethtool_set_ringparam(struct net_device *netdev, CFG_SET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx, rx_count); - if (lio_reset_queues(netdev, lio->linfo.num_txpciq)) + if (lio_reset_queues(netdev, oct->num_iqs)) goto err_lio_reset_queues; if (stopped) @@ -1063,33 +1292,48 @@ lio_get_ethtool_stats(struct net_device *netdev, { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct_dev = lio->oct_dev; - struct net_device_stats *netstats = &netdev->stats; + struct rtnl_link_stats64 lstats; int i = 0, j; if (ifstate_check(lio, LIO_IFSTATE_RESETTING)) return; - netdev->netdev_ops->ndo_get_stats(netdev); - octnet_get_link_stats(netdev); - + netdev->netdev_ops->ndo_get_stats64(netdev, &lstats); /*sum of oct->droq[oq_no]->stats->rx_pkts_received */ - data[i++] = CVM_CAST64(netstats->rx_packets); + data[i++] = lstats.rx_packets; /*sum of oct->instr_queue[iq_no]->stats.tx_done */ - data[i++] = CVM_CAST64(netstats->tx_packets); + data[i++] = lstats.tx_packets; /*sum of oct->droq[oq_no]->stats->rx_bytes_received */ - data[i++] = CVM_CAST64(netstats->rx_bytes); + data[i++] = lstats.rx_bytes; /*sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */ - data[i++] = CVM_CAST64(netstats->tx_bytes); - data[i++] = CVM_CAST64(netstats->rx_errors); - data[i++] = CVM_CAST64(netstats->tx_errors); + data[i++] = lstats.tx_bytes; + data[i++] = lstats.rx_errors + + oct_dev->link_stats.fromwire.fcs_err + + oct_dev->link_stats.fromwire.jabber_err + + oct_dev->link_stats.fromwire.l2_err + + oct_dev->link_stats.fromwire.frame_err; + data[i++] = lstats.tx_errors; /*sum of oct->droq[oq_no]->stats->rx_dropped + *oct->droq[oq_no]->stats->dropped_nodispatch + *oct->droq[oq_no]->stats->dropped_toomany + *oct->droq[oq_no]->stats->dropped_nomem */ - data[i++] = CVM_CAST64(netstats->rx_dropped); + data[i++] = lstats.rx_dropped + + oct_dev->link_stats.fromwire.fifo_err + + oct_dev->link_stats.fromwire.dmac_drop + + oct_dev->link_stats.fromwire.red_drops + + oct_dev->link_stats.fromwire.fw_err_pko + + oct_dev->link_stats.fromwire.fw_err_link + + oct_dev->link_stats.fromwire.fw_err_drop; /*sum of oct->instr_queue[iq_no]->stats.tx_dropped */ - data[i++] = CVM_CAST64(netstats->tx_dropped); + data[i++] = lstats.tx_dropped + + oct_dev->link_stats.fromhost.max_collision_fail + + oct_dev->link_stats.fromhost.max_deferral_fail + + oct_dev->link_stats.fromhost.total_collisions + + oct_dev->link_stats.fromhost.fw_err_pko + + oct_dev->link_stats.fromhost.fw_err_link + + oct_dev->link_stats.fromhost.fw_err_drop + + oct_dev->link_stats.fromhost.fw_err_pki; /* firmware tx stats */ /*per_core_stats[cvmx_get_core_num()].link_stats[mdata->from_ifidx]. @@ -1124,6 +1368,10 @@ lio_get_ethtool_stats(struct net_device *netdev, */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tx_vxlan); + /* Multicast packets sent by this port */ + data[i++] = oct_dev->link_stats.fromhost.fw_total_mcast_sent; + data[i++] = oct_dev->link_stats.fromhost.fw_total_bcast_sent; + /* mac tx statistics */ /*CVMX_BGXX_CMRX_TX_STAT5 */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.total_pkts_sent); @@ -1160,6 +1408,9 @@ lio_get_ethtool_stats(struct net_device *netdev, *fw_total_fwd */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_total_fwd); + /* Multicast packets received on this port */ + data[i++] = oct_dev->link_stats.fromwire.fw_total_mcast; + data[i++] = oct_dev->link_stats.fromwire.fw_total_bcast; /*per_core_stats[core_id].link_stats[ifidx].fromwire.jabber_err */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.jabber_err); /*per_core_stats[core_id].link_stats[ifidx].fromwire.l2_err */ @@ -1328,7 +1579,7 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev, __attribute__((unused)), u64 *data) { - struct net_device_stats *netstats = &netdev->stats; + struct rtnl_link_stats64 lstats; struct lio *lio = GET_LIO(netdev); struct octeon_device *oct_dev = lio->oct_dev; int i = 0, j, vj; @@ -1336,25 +1587,31 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev, if (ifstate_check(lio, LIO_IFSTATE_RESETTING)) return; - netdev->netdev_ops->ndo_get_stats(netdev); + netdev->netdev_ops->ndo_get_stats64(netdev, &lstats); /* sum of oct->droq[oq_no]->stats->rx_pkts_received */ - data[i++] = CVM_CAST64(netstats->rx_packets); + data[i++] = lstats.rx_packets; /* sum of oct->instr_queue[iq_no]->stats.tx_done */ - data[i++] = CVM_CAST64(netstats->tx_packets); + data[i++] = lstats.tx_packets; /* sum of oct->droq[oq_no]->stats->rx_bytes_received */ - data[i++] = CVM_CAST64(netstats->rx_bytes); + data[i++] = lstats.rx_bytes; /* sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */ - data[i++] = CVM_CAST64(netstats->tx_bytes); - data[i++] = CVM_CAST64(netstats->rx_errors); - data[i++] = CVM_CAST64(netstats->tx_errors); + data[i++] = lstats.tx_bytes; + data[i++] = lstats.rx_errors; + data[i++] = lstats.tx_errors; /* sum of oct->droq[oq_no]->stats->rx_dropped + * oct->droq[oq_no]->stats->dropped_nodispatch + * oct->droq[oq_no]->stats->dropped_toomany + * oct->droq[oq_no]->stats->dropped_nomem */ - data[i++] = CVM_CAST64(netstats->rx_dropped); + data[i++] = lstats.rx_dropped; /* sum of oct->instr_queue[iq_no]->stats.tx_dropped */ - data[i++] = CVM_CAST64(netstats->tx_dropped); + data[i++] = lstats.tx_dropped; + + data[i++] = oct_dev->link_stats.fromwire.fw_total_mcast; + data[i++] = oct_dev->link_stats.fromhost.fw_total_mcast_sent; + data[i++] = oct_dev->link_stats.fromwire.fw_total_bcast; + data[i++] = oct_dev->link_stats.fromhost.fw_total_bcast_sent; + /* lio->link_changes */ data[i++] = CVM_CAST64(lio->link_changes); @@ -1765,161 +2022,6 @@ static int octnet_set_intrmod_cfg(struct lio *lio, return -EINTR; } -static void -octnet_nic_stats_callback(struct octeon_device *oct_dev, - u32 status, void *ptr) -{ - struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; - struct oct_nic_stats_resp *resp = - (struct oct_nic_stats_resp *)sc->virtrptr; - struct oct_nic_stats_ctrl *ctrl = - (struct oct_nic_stats_ctrl *)sc->ctxptr; - struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire; - struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost; - - struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire; - struct nic_tx_stats *tstats = &oct_dev->link_stats.fromhost; - - if ((status != OCTEON_REQUEST_TIMEOUT) && !resp->status) { - octeon_swap_8B_data((u64 *)&resp->stats, - (sizeof(struct oct_link_stats)) >> 3); - - /* RX link-level stats */ - rstats->total_rcvd = rsp_rstats->total_rcvd; - rstats->bytes_rcvd = rsp_rstats->bytes_rcvd; - rstats->total_bcst = rsp_rstats->total_bcst; - rstats->total_mcst = rsp_rstats->total_mcst; - rstats->runts = rsp_rstats->runts; - rstats->ctl_rcvd = rsp_rstats->ctl_rcvd; - /* Accounts for over/under-run of buffers */ - rstats->fifo_err = rsp_rstats->fifo_err; - rstats->dmac_drop = rsp_rstats->dmac_drop; - rstats->fcs_err = rsp_rstats->fcs_err; - rstats->jabber_err = rsp_rstats->jabber_err; - rstats->l2_err = rsp_rstats->l2_err; - rstats->frame_err = rsp_rstats->frame_err; - - /* RX firmware stats */ - rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd; - rstats->fw_total_fwd = rsp_rstats->fw_total_fwd; - rstats->fw_err_pko = rsp_rstats->fw_err_pko; - rstats->fw_err_link = rsp_rstats->fw_err_link; - rstats->fw_err_drop = rsp_rstats->fw_err_drop; - rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan; - rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err; - - /* Number of packets that are LROed */ - rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts; - /* Number of octets that are LROed */ - rstats->fw_lro_octs = rsp_rstats->fw_lro_octs; - /* Number of LRO packets formed */ - rstats->fw_total_lro = rsp_rstats->fw_total_lro; - /* Number of times lRO of packet aborted */ - rstats->fw_lro_aborts = rsp_rstats->fw_lro_aborts; - rstats->fw_lro_aborts_port = rsp_rstats->fw_lro_aborts_port; - rstats->fw_lro_aborts_seq = rsp_rstats->fw_lro_aborts_seq; - rstats->fw_lro_aborts_tsval = rsp_rstats->fw_lro_aborts_tsval; - rstats->fw_lro_aborts_timer = rsp_rstats->fw_lro_aborts_timer; - /* intrmod: packet forward rate */ - rstats->fwd_rate = rsp_rstats->fwd_rate; - - /* TX link-level stats */ - tstats->total_pkts_sent = rsp_tstats->total_pkts_sent; - tstats->total_bytes_sent = rsp_tstats->total_bytes_sent; - tstats->mcast_pkts_sent = rsp_tstats->mcast_pkts_sent; - tstats->bcast_pkts_sent = rsp_tstats->bcast_pkts_sent; - tstats->ctl_sent = rsp_tstats->ctl_sent; - /* Packets sent after one collision*/ - tstats->one_collision_sent = rsp_tstats->one_collision_sent; - /* Packets sent after multiple collision*/ - tstats->multi_collision_sent = rsp_tstats->multi_collision_sent; - /* Packets not sent due to max collisions */ - tstats->max_collision_fail = rsp_tstats->max_collision_fail; - /* Packets not sent due to max deferrals */ - tstats->max_deferral_fail = rsp_tstats->max_deferral_fail; - /* Accounts for over/under-run of buffers */ - tstats->fifo_err = rsp_tstats->fifo_err; - tstats->runts = rsp_tstats->runts; - /* Total number of collisions detected */ - tstats->total_collisions = rsp_tstats->total_collisions; - - /* firmware stats */ - tstats->fw_total_sent = rsp_tstats->fw_total_sent; - tstats->fw_total_fwd = rsp_tstats->fw_total_fwd; - tstats->fw_err_pko = rsp_tstats->fw_err_pko; - tstats->fw_err_pki = rsp_tstats->fw_err_pki; - tstats->fw_err_link = rsp_tstats->fw_err_link; - tstats->fw_err_drop = rsp_tstats->fw_err_drop; - tstats->fw_tso = rsp_tstats->fw_tso; - tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd; - tstats->fw_err_tso = rsp_tstats->fw_err_tso; - tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan; - - resp->status = 1; - } else { - resp->status = -1; - } - complete(&ctrl->complete); -} - -/* Configure interrupt moderation parameters */ -static int octnet_get_link_stats(struct net_device *netdev) -{ - struct lio *lio = GET_LIO(netdev); - struct octeon_device *oct_dev = lio->oct_dev; - - struct octeon_soft_command *sc; - struct oct_nic_stats_ctrl *ctrl; - struct oct_nic_stats_resp *resp; - - int retval; - - /* Alloc soft command */ - sc = (struct octeon_soft_command *) - octeon_alloc_soft_command(oct_dev, - 0, - sizeof(struct oct_nic_stats_resp), - sizeof(struct octnic_ctrl_pkt)); - - if (!sc) - return -ENOMEM; - - resp = (struct oct_nic_stats_resp *)sc->virtrptr; - memset(resp, 0, sizeof(struct oct_nic_stats_resp)); - - ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr; - memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl)); - ctrl->netdev = netdev; - init_completion(&ctrl->complete); - - sc->iq_no = lio->linfo.txpciq[0].s.q_no; - - octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC, - OPCODE_NIC_PORT_STATS, 0, 0, 0); - - sc->callback = octnet_nic_stats_callback; - sc->callback_arg = sc; - sc->wait_time = 500; /*in milli seconds*/ - - retval = octeon_send_soft_command(oct_dev, sc); - if (retval == IQ_SEND_FAILED) { - octeon_free_soft_command(oct_dev, sc); - return -EINVAL; - } - - wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000)); - - if (resp->status != 1) { - octeon_free_soft_command(oct_dev, sc); - - return -EINVAL; - } - - octeon_free_soft_command(oct_dev, sc); - - return 0; -} - static int lio_get_intr_coalesce(struct net_device *netdev, struct ethtool_coalesce *intr_coal) { diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 603a144d3d9c..ee75048b3937 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -138,33 +138,10 @@ union tx_info { * by this structure in the NIC module. */ -#define OCTNIC_MAX_SG (MAX_SKB_FRAGS) - #define OCTNIC_GSO_MAX_HEADER_SIZE 128 #define OCTNIC_GSO_MAX_SIZE \ (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE) -/** Structure of a node in list of gather components maintained by - * NIC driver for each network device. - */ -struct octnic_gather { - /** List manipulation. Next and prev pointers. */ - struct list_head list; - - /** Size of the gather component at sg in bytes. */ - int sg_size; - - /** Number of bytes that sg was adjusted to make it 8B-aligned. */ - int adjust; - - /** Gather component that can accommodate max sized fragment list - * received from the IP layer. - */ - struct octeon_sg_entry *sg; - - dma_addr_t sg_dma_ptr; -}; - struct handshake { struct completion init; struct completion started; @@ -520,7 +497,7 @@ static void liquidio_deinit_pci(void) */ static inline int check_txq_status(struct lio *lio) { - int numqs = lio->netdev->num_tx_queues; + int numqs = lio->netdev->real_num_tx_queues; int ret_val = 0; int q, iq; @@ -542,148 +519,6 @@ static inline int check_txq_status(struct lio *lio) } /** - * Remove the node at the head of the list. The list would be empty at - * the end of this call if there are no more nodes in the list. - */ -static inline struct list_head *list_delete_head(struct list_head *root) -{ - struct list_head *node; - - if ((root->prev == root) && (root->next == root)) - node = NULL; - else - node = root->next; - - if (node) - list_del(node); - - return node; -} - -/** - * \brief Delete gather lists - * @param lio per-network private data - */ -static void delete_glists(struct lio *lio) -{ - struct octnic_gather *g; - int i; - - kfree(lio->glist_lock); - lio->glist_lock = NULL; - - if (!lio->glist) - return; - - for (i = 0; i < lio->linfo.num_txpciq; i++) { - do { - g = (struct octnic_gather *) - list_delete_head(&lio->glist[i]); - if (g) - kfree(g); - } while (g); - - if (lio->glists_virt_base && lio->glists_virt_base[i] && - lio->glists_dma_base && lio->glists_dma_base[i]) { - lio_dma_free(lio->oct_dev, - lio->glist_entry_size * lio->tx_qsize, - lio->glists_virt_base[i], - lio->glists_dma_base[i]); - } - } - - kfree(lio->glists_virt_base); - lio->glists_virt_base = NULL; - - kfree(lio->glists_dma_base); - lio->glists_dma_base = NULL; - - kfree(lio->glist); - lio->glist = NULL; -} - -/** - * \brief Setup gather lists - * @param lio per-network private data - */ -static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) -{ - int i, j; - struct octnic_gather *g; - - lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock), - GFP_KERNEL); - if (!lio->glist_lock) - return -ENOMEM; - - lio->glist = kcalloc(num_iqs, sizeof(*lio->glist), - GFP_KERNEL); - if (!lio->glist) { - kfree(lio->glist_lock); - lio->glist_lock = NULL; - return -ENOMEM; - } - - lio->glist_entry_size = - ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE); - - /* allocate memory to store virtual and dma base address of - * per glist consistent memory - */ - lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base), - GFP_KERNEL); - lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base), - GFP_KERNEL); - - if (!lio->glists_virt_base || !lio->glists_dma_base) { - delete_glists(lio); - return -ENOMEM; - } - - for (i = 0; i < num_iqs; i++) { - int numa_node = dev_to_node(&oct->pci_dev->dev); - - spin_lock_init(&lio->glist_lock[i]); - - INIT_LIST_HEAD(&lio->glist[i]); - - lio->glists_virt_base[i] = - lio_dma_alloc(oct, - lio->glist_entry_size * lio->tx_qsize, - &lio->glists_dma_base[i]); - - if (!lio->glists_virt_base[i]) { - delete_glists(lio); - return -ENOMEM; - } - - for (j = 0; j < lio->tx_qsize; j++) { - g = kzalloc_node(sizeof(*g), GFP_KERNEL, - numa_node); - if (!g) - g = kzalloc(sizeof(*g), GFP_KERNEL); - if (!g) - break; - - g->sg = lio->glists_virt_base[i] + - (j * lio->glist_entry_size); - - g->sg_dma_ptr = lio->glists_dma_base[i] + - (j * lio->glist_entry_size); - - list_add_tail(&g->list, &lio->glist[i]); - } - - if (j != lio->tx_qsize) { - delete_glists(lio); - return -ENOMEM; - } - } - - return 0; -} - -/** * \brief Print link information * @param netdev network device */ @@ -1471,7 +1306,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) cleanup_rx_oom_poll_fn(netdev); - delete_glists(lio); + lio_delete_glists(lio); free_netdev(netdev); @@ -1686,7 +1521,7 @@ static void free_netsgbuf(void *buf) i++; } - iq = skb_iq(lio, skb); + iq = skb_iq(lio->oct_dev, skb); spin_lock(&lio->glist_lock[iq]); list_add_tail(&g->list, &lio->glist[iq]); spin_unlock(&lio->glist_lock[iq]); @@ -1729,7 +1564,7 @@ static void free_netsgbuf_with_resp(void *buf) i++; } - iq = skb_iq(lio, skb); + iq = skb_iq(lio->oct_dev, skb); spin_lock(&lio->glist_lock[iq]); list_add_tail(&g->list, &lio->glist[iq]); @@ -1942,39 +1777,6 @@ static int load_firmware(struct octeon_device *oct) } /** - * \brief Callback for getting interface configuration - * @param status status of request - * @param buf pointer to resp structure - */ -static void if_cfg_callback(struct octeon_device *oct, - u32 status __attribute__((unused)), - void *buf) -{ - struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; - struct liquidio_if_cfg_resp *resp; - struct liquidio_if_cfg_context *ctx; - - resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; - ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; - - oct = lio_get_device(ctx->octeon_id); - if (resp->status) - dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: 0x%llx (0x%08x)\n", - CVM_CAST64(resp->status), status); - WRITE_ONCE(ctx->cond, 1); - - snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s", - resp->cfg_info.liquidio_firmware_version); - - /* This barrier is required to be sure that the response has been - * written fully before waking up the handler - */ - wmb(); - - wake_up_interruptible(&ctx->wc); -} - -/** * \brief Poll routine for checking transmit queue status * @param work work_struct data structure */ @@ -2049,11 +1851,6 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - /* Ready for link status updates */ - lio->intf_open = 1; - - netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); - if (OCTEON_CN23XX_PF(oct)) { if (!oct->msix_on) if (setup_tx_poll_fn(netdev)) @@ -2063,7 +1860,12 @@ static int liquidio_open(struct net_device *netdev) return -1; } - start_txqs(netdev); + netif_tx_start_all_queues(netdev); + + /* Ready for link status updates */ + lio->intf_open = 1; + + netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); /* tell Octeon to start forwarding packets to host */ send_rx_ctrl_cmd(lio, 1); @@ -2086,11 +1888,15 @@ static int liquidio_stop(struct net_device *netdev) ifstate_reset(lio, LIO_IFSTATE_RUNNING); - netif_tx_disable(netdev); + /* Stop any link updates */ + lio->intf_open = 0; + + stop_txqs(netdev); /* Inform that netif carrier is down */ netif_carrier_off(netdev); - lio->intf_open = 0; + netif_tx_disable(netdev); + lio->linfo.link.s.link_up = 0; lio->link_changes++; @@ -2252,14 +2058,11 @@ static int liquidio_set_mac(struct net_device *netdev, void *p) return 0; } -/** - * \brief Net device get_stats - * @param netdev network device - */ -static struct net_device_stats *liquidio_get_stats(struct net_device *netdev) +static void +liquidio_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *lstats) { struct lio *lio = GET_LIO(netdev); - struct net_device_stats *stats = &netdev->stats; struct octeon_device *oct; u64 pkts = 0, drop = 0, bytes = 0; struct oct_droq_stats *oq_stats; @@ -2269,7 +2072,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev) oct = lio->oct_dev; if (ifstate_check(lio, LIO_IFSTATE_RESETTING)) - return stats; + return; for (i = 0; i < oct->num_iqs; i++) { iq_no = lio->linfo.txpciq[i].s.q_no; @@ -2279,9 +2082,9 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev) bytes += iq_stats->tx_tot_bytes; } - stats->tx_packets = pkts; - stats->tx_bytes = bytes; - stats->tx_dropped = drop; + lstats->tx_packets = pkts; + lstats->tx_bytes = bytes; + lstats->tx_dropped = drop; pkts = 0; drop = 0; @@ -2298,11 +2101,34 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev) bytes += oq_stats->rx_bytes_received; } - stats->rx_bytes = bytes; - stats->rx_packets = pkts; - stats->rx_dropped = drop; - - return stats; + lstats->rx_bytes = bytes; + lstats->rx_packets = pkts; + lstats->rx_dropped = drop; + + octnet_get_link_stats(netdev); + lstats->multicast = oct->link_stats.fromwire.fw_total_mcast; + lstats->collisions = oct->link_stats.fromhost.total_collisions; + + /* detailed rx_errors: */ + lstats->rx_length_errors = oct->link_stats.fromwire.l2_err; + /* recved pkt with crc error */ + lstats->rx_crc_errors = oct->link_stats.fromwire.fcs_err; + /* recv'd frame alignment error */ + lstats->rx_frame_errors = oct->link_stats.fromwire.frame_err; + /* recv'r fifo overrun */ + lstats->rx_fifo_errors = oct->link_stats.fromwire.fifo_err; + + lstats->rx_errors = lstats->rx_length_errors + lstats->rx_crc_errors + + lstats->rx_frame_errors + lstats->rx_fifo_errors; + + /* detailed tx_errors */ + lstats->tx_aborted_errors = oct->link_stats.fromhost.fw_err_pko; + lstats->tx_carrier_errors = oct->link_stats.fromhost.fw_err_link; + lstats->tx_fifo_errors = oct->link_stats.fromhost.fifo_err; + + lstats->tx_errors = lstats->tx_aborted_errors + + lstats->tx_carrier_errors + + lstats->tx_fifo_errors; } /** @@ -2510,7 +2336,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) lio = GET_LIO(netdev); oct = lio->oct_dev; - q_idx = skb_iq(lio, skb); + q_idx = skb_iq(oct, skb); tag = q_idx; iq_no = lio->linfo.txpciq[q_idx].s.q_no; @@ -2585,6 +2411,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) if (dma_mapping_error(&oct->pci_dev->dev, dptr)) { dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n", __func__); + stats->tx_dmamap_fail++; return NETDEV_TX_BUSY; } @@ -2602,7 +2429,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) spin_lock(&lio->glist_lock[q_idx]); g = (struct octnic_gather *) - list_delete_head(&lio->glist[q_idx]); + lio_list_delete_head(&lio->glist[q_idx]); spin_unlock(&lio->glist_lock[q_idx]); if (!g) { @@ -2624,6 +2451,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) if (dma_mapping_error(&oct->pci_dev->dev, g->sg[0].ptr[0])) { dev_err(&oct->pci_dev->dev, "%s DMA mapping error 2\n", __func__); + stats->tx_dmamap_fail++; return NETDEV_TX_BUSY; } add_sg_size(&g->sg[0], (skb->len - skb->data_len), 0); @@ -3324,11 +3152,36 @@ static const struct switchdev_ops lio_pf_switchdev_ops = { .switchdev_port_attr_get = lio_pf_switchdev_attr_get, }; +static int liquidio_get_vf_stats(struct net_device *netdev, int vfidx, + struct ifla_vf_stats *vf_stats) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct oct_vf_stats stats; + int ret; + + if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced) + return -EINVAL; + + memset(&stats, 0, sizeof(struct oct_vf_stats)); + ret = cn23xx_get_vf_stats(oct, vfidx, &stats); + if (!ret) { + vf_stats->rx_packets = stats.rx_packets; + vf_stats->tx_packets = stats.tx_packets; + vf_stats->rx_bytes = stats.rx_bytes; + vf_stats->tx_bytes = stats.tx_bytes; + vf_stats->broadcast = stats.broadcast; + vf_stats->multicast = stats.multicast; + } + + return ret; +} + static const struct net_device_ops lionetdevops = { .ndo_open = liquidio_open, .ndo_stop = liquidio_stop, .ndo_start_xmit = liquidio_xmit, - .ndo_get_stats = liquidio_get_stats, + .ndo_get_stats64 = liquidio_get_stats64, .ndo_set_mac_address = liquidio_set_mac, .ndo_set_rx_mode = liquidio_set_mcast_list, .ndo_tx_timeout = liquidio_tx_timeout, @@ -3346,6 +3199,7 @@ static const struct net_device_ops lionetdevops = { .ndo_get_vf_config = liquidio_get_vf_config, .ndo_set_vf_trust = liquidio_set_vf_trust, .ndo_set_vf_link_state = liquidio_set_vf_link_state, + .ndo_get_vf_stats = liquidio_get_vf_stats, }; /** \brief Entry point for the liquidio module @@ -3448,6 +3302,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) struct liquidio_if_cfg_resp *resp; struct octdev_props *props; int retval, num_iqueues, num_oqueues; + int max_num_queues = 0; union oct_nic_if_cfg if_cfg; unsigned int base_queue; unsigned int gmx_port_id; @@ -3528,9 +3383,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) OPCODE_NIC_IF_CFG, 0, if_cfg.u64, 0); - sc->callback = if_cfg_callback; + sc->callback = lio_if_cfg_callback; sc->callback_arg = sc; - sc->wait_time = 3000; + sc->wait_time = LIO_IFCFG_WAIT_TIME; retval = octeon_send_soft_command(octeon_dev, sc); if (retval == IQ_SEND_FAILED) { @@ -3584,11 +3439,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) resp->cfg_info.oqmask); goto setup_nic_dev_fail; } + + if (OCTEON_CN6XXX(octeon_dev)) { + max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev, + cn6xxx)); + } else if (OCTEON_CN23XX_PF(octeon_dev)) { + max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev, + cn23xx_pf)); + } + dev_dbg(&octeon_dev->pci_dev->dev, - "interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d\n", + "interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d max_num_queues: %d\n", i, resp->cfg_info.iqmask, resp->cfg_info.oqmask, - num_iqueues, num_oqueues); - netdev = alloc_etherdev_mq(LIO_SIZE, num_iqueues); + num_iqueues, num_oqueues, max_num_queues); + netdev = alloc_etherdev_mq(LIO_SIZE, max_num_queues); if (!netdev) { dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n"); @@ -3603,6 +3467,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) netdev->netdev_ops = &lionetdevops; SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops); + retval = netif_set_real_num_rx_queues(netdev, num_oqueues); + if (retval) { + dev_err(&octeon_dev->pci_dev->dev, + "setting real number rx failed\n"); + goto setup_nic_dev_fail; + } + + retval = netif_set_real_num_tx_queues(netdev, num_iqueues); + if (retval) { + dev_err(&octeon_dev->pci_dev->dev, + "setting real number tx failed\n"); + goto setup_nic_dev_fail; + } + lio = GET_LIO(netdev); memset(lio, 0, sizeof(struct lio)); @@ -3724,7 +3602,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq); lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq); - if (setup_glists(octeon_dev, lio, num_iqueues)) { + if (lio_setup_glists(octeon_dev, lio, num_iqueues)) { dev_err(&octeon_dev->pci_dev->dev, "Gather list allocation failed\n"); goto setup_nic_dev_fail; @@ -4223,7 +4101,9 @@ static int octeon_device_init(struct octeon_device *octeon_dev) } atomic_set(&octeon_dev->status, OCT_DEV_MBOX_SETUP_DONE); - if (octeon_allocate_ioq_vector(octeon_dev)) { + if (octeon_allocate_ioq_vector + (octeon_dev, + octeon_dev->sriov_info.num_pf_rings)) { dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n"); return 1; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index f92dfa411de6..6295eeec795c 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -69,30 +69,10 @@ union tx_info { } s; }; -#define OCTNIC_MAX_SG (MAX_SKB_FRAGS) - #define OCTNIC_GSO_MAX_HEADER_SIZE 128 #define OCTNIC_GSO_MAX_SIZE \ (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE) -struct octnic_gather { - /* List manipulation. Next and prev pointers. */ - struct list_head list; - - /* Size of the gather component at sg in bytes. */ - int sg_size; - - /* Number of bytes that sg was adjusted to make it 8B-aligned. */ - int adjust; - - /* Gather component that can accommodate max sized fragment list - * received from the IP layer. - */ - struct octeon_sg_entry *sg; - - dma_addr_t sg_dma_ptr; -}; - static int liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void liquidio_vf_remove(struct pci_dev *pdev); @@ -285,142 +265,6 @@ static struct pci_driver liquidio_vf_pci_driver = { }; /** - * Remove the node at the head of the list. The list would be empty at - * the end of this call if there are no more nodes in the list. - */ -static struct list_head *list_delete_head(struct list_head *root) -{ - struct list_head *node; - - if ((root->prev == root) && (root->next == root)) - node = NULL; - else - node = root->next; - - if (node) - list_del(node); - - return node; -} - -/** - * \brief Delete gather lists - * @param lio per-network private data - */ -static void delete_glists(struct lio *lio) -{ - struct octnic_gather *g; - int i; - - kfree(lio->glist_lock); - lio->glist_lock = NULL; - - if (!lio->glist) - return; - - for (i = 0; i < lio->linfo.num_txpciq; i++) { - do { - g = (struct octnic_gather *) - list_delete_head(&lio->glist[i]); - kfree(g); - } while (g); - - if (lio->glists_virt_base && lio->glists_virt_base[i] && - lio->glists_dma_base && lio->glists_dma_base[i]) { - lio_dma_free(lio->oct_dev, - lio->glist_entry_size * lio->tx_qsize, - lio->glists_virt_base[i], - lio->glists_dma_base[i]); - } - } - - kfree(lio->glists_virt_base); - lio->glists_virt_base = NULL; - - kfree(lio->glists_dma_base); - lio->glists_dma_base = NULL; - - kfree(lio->glist); - lio->glist = NULL; -} - -/** - * \brief Setup gather lists - * @param lio per-network private data - */ -static int setup_glists(struct lio *lio, int num_iqs) -{ - struct octnic_gather *g; - int i, j; - - lio->glist_lock = - kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL); - if (!lio->glist_lock) - return -ENOMEM; - - lio->glist = - kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL); - if (!lio->glist) { - kfree(lio->glist_lock); - lio->glist_lock = NULL; - return -ENOMEM; - } - - lio->glist_entry_size = - ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE); - - /* allocate memory to store virtual and dma base address of - * per glist consistent memory - */ - lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base), - GFP_KERNEL); - lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base), - GFP_KERNEL); - - if (!lio->glists_virt_base || !lio->glists_dma_base) { - delete_glists(lio); - return -ENOMEM; - } - - for (i = 0; i < num_iqs; i++) { - spin_lock_init(&lio->glist_lock[i]); - - INIT_LIST_HEAD(&lio->glist[i]); - - lio->glists_virt_base[i] = - lio_dma_alloc(lio->oct_dev, - lio->glist_entry_size * lio->tx_qsize, - &lio->glists_dma_base[i]); - - if (!lio->glists_virt_base[i]) { - delete_glists(lio); - return -ENOMEM; - } - - for (j = 0; j < lio->tx_qsize; j++) { - g = kzalloc(sizeof(*g), GFP_KERNEL); - if (!g) - break; - - g->sg = lio->glists_virt_base[i] + - (j * lio->glist_entry_size); - - g->sg_dma_ptr = lio->glists_dma_base[i] + - (j * lio->glist_entry_size); - - list_add_tail(&g->list, &lio->glist[i]); - } - - if (j != lio->tx_qsize) { - delete_glists(lio); - return -ENOMEM; - } - } - - return 0; -} - -/** * \brief Print link information * @param netdev network device */ @@ -856,7 +700,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) cleanup_link_status_change_wq(netdev); - delete_glists(lio); + lio_delete_glists(lio); free_netdev(netdev); @@ -1005,7 +849,7 @@ static void free_netsgbuf(void *buf) i++; } - iq = skb_iq(lio, skb); + iq = skb_iq(lio->oct_dev, skb); spin_lock(&lio->glist_lock[iq]); list_add_tail(&g->list, &lio->glist[iq]); @@ -1049,7 +893,7 @@ static void free_netsgbuf_with_resp(void *buf) i++; } - iq = skb_iq(lio, skb); + iq = skb_iq(lio->oct_dev, skb); spin_lock(&lio->glist_lock[iq]); list_add_tail(&g->list, &lio->glist[iq]); @@ -1059,38 +903,6 @@ static void free_netsgbuf_with_resp(void *buf) } /** - * \brief Callback for getting interface configuration - * @param status status of request - * @param buf pointer to resp structure - */ -static void if_cfg_callback(struct octeon_device *oct, - u32 status __attribute__((unused)), void *buf) -{ - struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; - struct liquidio_if_cfg_context *ctx; - struct liquidio_if_cfg_resp *resp; - - resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; - ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; - - oct = lio_get_device(ctx->octeon_id); - if (resp->status) - dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n", - CVM_CAST64(resp->status)); - WRITE_ONCE(ctx->cond, 1); - - snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s", - resp->cfg_info.liquidio_firmware_version); - - /* This barrier is required to be sure that the response has been - * written fully before waking up the handler - */ - wmb(); - - wake_up_interruptible(&ctx->wc); -} - -/** * \brief Net device open for LiquidIO * @param netdev network device */ @@ -1336,24 +1148,21 @@ static int liquidio_set_mac(struct net_device *netdev, void *p) return 0; } -/** - * \brief Net device get_stats - * @param netdev network device - */ -static struct net_device_stats *liquidio_get_stats(struct net_device *netdev) +static void +liquidio_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *lstats) { struct lio *lio = GET_LIO(netdev); - struct net_device_stats *stats = &netdev->stats; + struct octeon_device *oct; u64 pkts = 0, drop = 0, bytes = 0; struct oct_droq_stats *oq_stats; struct oct_iq_stats *iq_stats; - struct octeon_device *oct; int i, iq_no, oq_no; oct = lio->oct_dev; if (ifstate_check(lio, LIO_IFSTATE_RESETTING)) - return stats; + return; for (i = 0; i < oct->num_iqs; i++) { iq_no = lio->linfo.txpciq[i].s.q_no; @@ -1363,9 +1172,9 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev) bytes += iq_stats->tx_tot_bytes; } - stats->tx_packets = pkts; - stats->tx_bytes = bytes; - stats->tx_dropped = drop; + lstats->tx_packets = pkts; + lstats->tx_bytes = bytes; + lstats->tx_dropped = drop; pkts = 0; drop = 0; @@ -1382,11 +1191,29 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev) bytes += oq_stats->rx_bytes_received; } - stats->rx_bytes = bytes; - stats->rx_packets = pkts; - stats->rx_dropped = drop; + lstats->rx_bytes = bytes; + lstats->rx_packets = pkts; + lstats->rx_dropped = drop; + + octnet_get_link_stats(netdev); + lstats->multicast = oct->link_stats.fromwire.fw_total_mcast; + + /* detailed rx_errors: */ + lstats->rx_length_errors = oct->link_stats.fromwire.l2_err; + /* recved pkt with crc error */ + lstats->rx_crc_errors = oct->link_stats.fromwire.fcs_err; + /* recv'd frame alignment error */ + lstats->rx_frame_errors = oct->link_stats.fromwire.frame_err; + + lstats->rx_errors = lstats->rx_length_errors + lstats->rx_crc_errors + + lstats->rx_frame_errors; + + /* detailed tx_errors */ + lstats->tx_aborted_errors = oct->link_stats.fromhost.fw_err_pko; + lstats->tx_carrier_errors = oct->link_stats.fromhost.fw_err_link; - return stats; + lstats->tx_errors = lstats->tx_aborted_errors + + lstats->tx_carrier_errors; } /** @@ -1580,7 +1407,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) lio = GET_LIO(netdev); oct = lio->oct_dev; - q_idx = skb_iq(lio, skb); + q_idx = skb_iq(lio->oct_dev, skb); tag = q_idx; iq_no = lio->linfo.txpciq[q_idx].s.q_no; @@ -1661,8 +1488,8 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) int i, frags; spin_lock(&lio->glist_lock[q_idx]); - g = (struct octnic_gather *)list_delete_head( - &lio->glist[q_idx]); + g = (struct octnic_gather *) + lio_list_delete_head(&lio->glist[q_idx]); spin_unlock(&lio->glist_lock[q_idx]); if (!g) { @@ -2034,7 +1861,7 @@ static const struct net_device_ops lionetdevops = { .ndo_open = liquidio_open, .ndo_stop = liquidio_stop, .ndo_start_xmit = liquidio_xmit, - .ndo_get_stats = liquidio_get_stats, + .ndo_get_stats64 = liquidio_get_stats64, .ndo_set_mac_address = liquidio_set_mac, .ndo_set_rx_mode = liquidio_set_mcast_list, .ndo_tx_timeout = liquidio_tx_timeout, @@ -2156,7 +1983,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) OPCODE_NIC_IF_CFG, 0, if_cfg.u64, 0); - sc->callback = if_cfg_callback; + sc->callback = lio_if_cfg_callback; sc->callback_arg = sc; sc->wait_time = 5000; @@ -2273,6 +2100,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) netdev->features = (lio->dev_capability & ~NETIF_F_LRO); netdev->hw_features = lio->dev_capability; + netdev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; /* MTU range: 68 - 16000 */ netdev->min_mtu = LIO_MIN_MTU_SIZE; @@ -2321,7 +2149,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq); lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq); - if (setup_glists(lio, num_iqueues)) { + if (lio_setup_glists(octeon_dev, lio, num_iqueues)) { dev_err(&octeon_dev->pci_dev->dev, "Gather list allocation failed\n"); goto setup_nic_dev_fail; @@ -2512,7 +2340,7 @@ static int octeon_device_init(struct octeon_device *oct) } atomic_set(&oct->status, OCT_DEV_MBOX_SETUP_DONE); - if (octeon_allocate_ioq_vector(oct)) { + if (octeon_allocate_ioq_vector(oct, oct->sriov_info.rings_per_vf)) { dev_err(&oct->pci_dev->dev, "ioq vector allocation failed\n"); return 1; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index 2adafa366d3f..ddd7431579f4 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -201,13 +201,14 @@ lio_vf_rep_get_stats64(struct net_device *dev, { struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); - stats64->tx_packets = vf_rep->stats.tx_packets; - stats64->tx_bytes = vf_rep->stats.tx_bytes; - stats64->tx_dropped = vf_rep->stats.tx_dropped; - - stats64->rx_packets = vf_rep->stats.rx_packets; - stats64->rx_bytes = vf_rep->stats.rx_bytes; - stats64->rx_dropped = vf_rep->stats.rx_dropped; + /* Swap tx and rx stats as VF rep is a switch port */ + stats64->tx_packets = vf_rep->stats.rx_packets; + stats64->tx_bytes = vf_rep->stats.rx_bytes; + stats64->tx_dropped = vf_rep->stats.rx_dropped; + + stats64->rx_packets = vf_rep->stats.tx_packets; + stats64->rx_bytes = vf_rep->stats.tx_bytes; + stats64->rx_dropped = vf_rep->stats.tx_dropped; } static int diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 75eea83c7cc6..026570499dcf 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -84,6 +84,7 @@ enum octeon_tag_type { #define OPCODE_NIC_IF_CFG 0x09 #define OPCODE_NIC_VF_DRV_NOTICE 0x0A #define OPCODE_NIC_INTRMOD_PARAMS 0x0B +#define OPCODE_NIC_QCOUNT_UPDATE 0x12 #define OPCODE_NIC_SET_TRUSTED_VF 0x13 #define OPCODE_NIC_SYNC_OCTEON_TIME 0x14 #define VF_DRV_LOADED 1 @@ -779,23 +780,32 @@ struct liquidio_if_cfg_info { /** Stats for each NIC port in RX direction. */ struct nic_rx_stats { /* link-level stats */ - u64 total_rcvd; - u64 bytes_rcvd; - u64 total_bcst; - u64 total_mcst; - u64 runts; - u64 ctl_rcvd; - u64 fifo_err; /* Accounts for over/under-run of buffers */ - u64 dmac_drop; - u64 fcs_err; - u64 jabber_err; - u64 l2_err; - u64 frame_err; + u64 total_rcvd; /* Received packets */ + u64 bytes_rcvd; /* Octets of received packets */ + u64 total_bcst; /* Number of non-dropped L2 broadcast packets */ + u64 total_mcst; /* Number of non-dropped L2 multicast packets */ + u64 runts; /* Packets shorter than allowed */ + u64 ctl_rcvd; /* Received PAUSE packets */ + u64 fifo_err; /* Packets dropped due to RX FIFO full */ + u64 dmac_drop; /* Packets dropped by the DMAC filter */ + u64 fcs_err; /* Sum of fragment, overrun, and FCS errors */ + u64 jabber_err; /* Packets larger than allowed */ + u64 l2_err; /* Sum of DMA, parity, PCAM access, no memory, + * buffer overflow, malformed L2 header or + * length, oversize errors + **/ + u64 frame_err; /* Sum of IPv4 and L4 checksum errors */ + u64 red_drops; /* Packets dropped by RED due to buffer + * exhaustion + **/ /* firmware stats */ u64 fw_total_rcvd; u64 fw_total_fwd; u64 fw_total_fwd_bytes; + u64 fw_total_mcast; + u64 fw_total_bcast; + u64 fw_err_pko; u64 fw_err_link; u64 fw_err_drop; @@ -806,11 +816,11 @@ struct nic_rx_stats { u64 fw_lro_pkts; /* Number of packets that are LROed */ u64 fw_lro_octs; /* Number of octets that are LROed */ u64 fw_total_lro; /* Number of LRO packets formed */ - u64 fw_lro_aborts; /* Number of times lRO of packet aborted */ + u64 fw_lro_aborts; /* Number of times LRO of packet aborted */ u64 fw_lro_aborts_port; u64 fw_lro_aborts_seq; u64 fw_lro_aborts_tsval; - u64 fw_lro_aborts_timer; + u64 fw_lro_aborts_timer; /* Timer setting error */ /* intrmod: packet forward rate */ u64 fwd_rate; }; @@ -818,23 +828,42 @@ struct nic_rx_stats { /** Stats for each NIC port in RX direction. */ struct nic_tx_stats { /* link-level stats */ - u64 total_pkts_sent; - u64 total_bytes_sent; - u64 mcast_pkts_sent; - u64 bcast_pkts_sent; - u64 ctl_sent; - u64 one_collision_sent; /* Packets sent after one collision*/ - u64 multi_collision_sent; /* Packets sent after multiple collision*/ - u64 max_collision_fail; /* Packets not sent due to max collisions */ - u64 max_deferral_fail; /* Packets not sent due to max deferrals */ - u64 fifo_err; /* Accounts for over/under-run of buffers */ - u64 runts; - u64 total_collisions; /* Total number of collisions detected */ + u64 total_pkts_sent; /* Total frames sent on the interface */ + u64 total_bytes_sent; /* Total octets sent on the interface */ + u64 mcast_pkts_sent; /* Packets sent to the multicast DMAC */ + u64 bcast_pkts_sent; /* Packets sent to a broadcast DMAC */ + u64 ctl_sent; /* Control/PAUSE packets sent */ + u64 one_collision_sent; /* Packets sent that experienced a + * single collision before successful + * transmission + **/ + u64 multi_collision_sent; /* Packets sent that experienced + * multiple collisions before successful + * transmission + **/ + u64 max_collision_fail; /* Packets dropped due to excessive + * collisions + **/ + u64 max_deferral_fail; /* Packets not sent due to max + * deferrals + **/ + u64 fifo_err; /* Packets sent that experienced a + * transmit underflow and were + * truncated + **/ + u64 runts; /* Packets sent with an octet count + * lessthan 64 + **/ + u64 total_collisions; /* Packets dropped due to excessive + * collisions + **/ /* firmware stats */ u64 fw_total_sent; u64 fw_total_fwd; u64 fw_total_fwd_bytes; + u64 fw_total_mcast_sent; + u64 fw_total_bcast_sent; u64 fw_err_pko; u64 fw_err_link; u64 fw_err_drop; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index f38abf626412..f878a552fef3 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -824,23 +824,18 @@ int octeon_deregister_device(struct octeon_device *oct) } int -octeon_allocate_ioq_vector(struct octeon_device *oct) +octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs) { - int i, num_ioqs = 0; struct octeon_ioq_vector *ioq_vector; int cpu_num; int size; - - if (OCTEON_CN23XX_PF(oct)) - num_ioqs = oct->sriov_info.num_pf_rings; - else if (OCTEON_CN23XX_VF(oct)) - num_ioqs = oct->sriov_info.rings_per_vf; + int i; size = sizeof(struct octeon_ioq_vector) * num_ioqs; oct->ioq_vector = vzalloc(size); if (!oct->ioq_vector) - return 1; + return -1; for (i = 0; i < num_ioqs; i++) { ioq_vector = &oct->ioq_vector[i]; ioq_vector->oct_dev = oct; @@ -856,6 +851,7 @@ octeon_allocate_ioq_vector(struct octeon_device *oct) else ioq_vector->ioq_num = i; } + return 0; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 91937cc5c1d7..9430c0a0bb8c 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -867,7 +867,7 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type); struct octeon_config *octeon_get_conf(struct octeon_device *oct); void octeon_free_ioq_vector(struct octeon_device *oct); -int octeon_allocate_ioq_vector(struct octeon_device *oct); +int octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs); void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq); /* LiquidIO driver pivate flags */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index 81c987682941..5fed7b63223e 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -62,8 +62,8 @@ struct oct_iq_stats { u64 tx_tot_bytes;/**< Total count of bytes sento to network. */ u64 tx_gso; /* count of tso */ u64 tx_vxlan; /* tunnel */ - u64 tx_dmamap_fail; - u64 tx_restart; + u64 tx_dmamap_fail; /* Number of times dma mapping failed */ + u64 tx_restart; /* Number of times this queue restarted */ }; #define OCT_IQ_STATS_SIZE (sizeof(struct oct_iq_stats)) diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c index 28e74ee23ff8..021d99cd1665 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c @@ -24,6 +24,7 @@ #include "octeon_device.h" #include "octeon_main.h" #include "octeon_mailbox.h" +#include "cn23xx_pf_device.h" /** * octeon_mbox_read: @@ -205,6 +206,26 @@ int octeon_mbox_write(struct octeon_device *oct, return ret; } +static void get_vf_stats(struct octeon_device *oct, + struct oct_vf_stats *stats) +{ + int i; + + for (i = 0; i < oct->num_iqs; i++) { + if (!oct->instr_queue[i]) + continue; + stats->tx_packets += oct->instr_queue[i]->stats.tx_done; + stats->tx_bytes += oct->instr_queue[i]->stats.tx_tot_bytes; + } + + for (i = 0; i < oct->num_oqs; i++) { + if (!oct->droq[i]) + continue; + stats->rx_packets += oct->droq[i]->stats.rx_pkts_received; + stats->rx_bytes += oct->droq[i]->stats.rx_bytes_received; + } +} + /** * octeon_mbox_process_cmd: * @mbox: Pointer mailbox @@ -250,6 +271,15 @@ static int octeon_mbox_process_cmd(struct octeon_mbox *mbox, mbox_cmd->msg.s.params); break; + case OCTEON_GET_VF_STATS: + dev_dbg(&oct->pci_dev->dev, "Got VF stats request. Sending data back\n"); + mbox_cmd->msg.s.type = OCTEON_MBOX_RESPONSE; + mbox_cmd->msg.s.resp_needed = 1; + mbox_cmd->msg.s.len = 1 + + sizeof(struct oct_vf_stats) / sizeof(u64); + get_vf_stats(oct, (struct oct_vf_stats *)mbox_cmd->data); + octeon_mbox_write(oct, mbox_cmd); + break; default: break; } @@ -322,3 +352,25 @@ int octeon_mbox_process_message(struct octeon_mbox *mbox) return 0; } + +int octeon_mbox_cancel(struct octeon_device *oct, int q_no) +{ + struct octeon_mbox *mbox = oct->mbox[q_no]; + struct octeon_mbox_cmd *mbox_cmd; + unsigned long flags = 0; + + spin_lock_irqsave(&mbox->lock, flags); + mbox_cmd = &mbox->mbox_resp; + + if (!(mbox->state & OCTEON_MBOX_STATE_RESPONSE_PENDING)) { + spin_unlock_irqrestore(&mbox->lock, flags); + return 1; + } + + mbox->state = OCTEON_MBOX_STATE_IDLE; + memset(mbox_cmd, 0, sizeof(*mbox_cmd)); + writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg); + spin_unlock_irqrestore(&mbox->lock, flags); + + return 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h index 1def22afeff1..d92bd7e16477 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h @@ -25,6 +25,7 @@ #define OCTEON_VF_ACTIVE 0x1 #define OCTEON_VF_FLR_REQUEST 0x2 #define OCTEON_PF_CHANGED_VF_MACADDR 0x4 +#define OCTEON_GET_VF_STATS 0x8 /*Macro for Read acknowldgement*/ #define OCTEON_PFVFACK 0xffffffffffffffffULL @@ -107,9 +108,15 @@ struct octeon_mbox { }; +struct oct_vf_stats_ctx { + atomic_t status; + struct oct_vf_stats *stats; +}; + int octeon_mbox_read(struct octeon_mbox *mbox); int octeon_mbox_write(struct octeon_device *oct, struct octeon_mbox_cmd *mbox_cmd); int octeon_mbox_process_message(struct octeon_mbox *mbox); +int octeon_mbox_cancel(struct octeon_device *oct, int q_no); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index 4069710796a8..8571f11e3c8f 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -47,6 +47,29 @@ struct liquidio_if_cfg_resp { u64 status; }; +#define LIO_IFCFG_WAIT_TIME 3000 /* In milli seconds */ + +/* Structure of a node in list of gather components maintained by + * NIC driver for each network device. + */ +struct octnic_gather { + /* List manipulation. Next and prev pointers. */ + struct list_head list; + + /* Size of the gather component at sg in bytes. */ + int sg_size; + + /* Number of bytes that sg was adjusted to make it 8B-aligned. */ + int adjust; + + /* Gather component that can accommodate max sized fragment list + * received from the IP layer. + */ + struct octeon_sg_entry *sg; + + dma_addr_t sg_dma_ptr; +}; + struct oct_nic_stats_resp { u64 rh; struct oct_link_stats stats; @@ -190,6 +213,8 @@ irqreturn_t liquidio_msix_intr_handler(int irq __attribute__((unused)), int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs); +int octnet_get_link_stats(struct net_device *netdev); + int lio_wait_for_clean_oq(struct octeon_device *oct); /** * \brief Register ethtool operations @@ -197,6 +222,14 @@ int lio_wait_for_clean_oq(struct octeon_device *oct); */ void liquidio_set_ethtool_ops(struct net_device *netdev); +void lio_if_cfg_callback(struct octeon_device *oct, + u32 status __attribute__((unused)), + void *buf); + +void lio_delete_glists(struct lio *lio); + +int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_qs); + /** * \brief Net device change_mtu * @param netdev network device @@ -515,7 +548,7 @@ static inline void stop_txqs(struct net_device *netdev) { int i; - for (i = 0; i < netdev->num_tx_queues; i++) + for (i = 0; i < netdev->real_num_tx_queues; i++) netif_stop_subqueue(netdev, i); } @@ -528,7 +561,7 @@ static inline void wake_txqs(struct net_device *netdev) struct lio *lio = GET_LIO(netdev); int i, qno; - for (i = 0; i < netdev->num_tx_queues; i++) { + for (i = 0; i < netdev->real_num_tx_queues; i++) { qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs].s.q_no; if (__netif_subqueue_stopped(netdev, i)) { @@ -549,14 +582,33 @@ static inline void start_txqs(struct net_device *netdev) int i; if (lio->linfo.link.s.link_up) { - for (i = 0; i < netdev->num_tx_queues; i++) + for (i = 0; i < netdev->real_num_tx_queues; i++) netif_start_subqueue(netdev, i); } } -static inline int skb_iq(struct lio *lio, struct sk_buff *skb) +static inline int skb_iq(struct octeon_device *oct, struct sk_buff *skb) { - return skb->queue_mapping % lio->linfo.num_txpciq; + return skb->queue_mapping % oct->num_iqs; +} + +/** + * Remove the node at the head of the list. The list would be empty at + * the end of this call if there are no more nodes in the list. + */ +static inline struct list_head *lio_list_delete_head(struct list_head *root) +{ + struct list_head *node; + + if (root->prev == root && root->next == root) + node = NULL; + else + node = root->next; + + if (node) + list_del(node); + + return node; } #endif diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 707db3304396..7135db45927e 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, action = bpf_prog_run_xdp(prog, &xdp); rcu_read_unlock(); + len = xdp.data_end - xdp.data; /* Check if XDP program has changed headers */ if (orig_data != xdp.data) { - len = xdp.data_end - xdp.data; offset = orig_data - xdp.data; dma_addr -= offset; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index db92f1858060..aae980205ece 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -64,8 +64,7 @@ static int set_tcb_field(struct adapter *adap, struct filter_entry *f, if (!skb) return -ENOMEM; - req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req)); - memset(req, 0, sizeof(*req)); + req = (struct cpl_set_tcb_field *)__skb_put_zero(skb, sizeof(*req)); INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid); req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(adap->sge.fw_evtq.abs_id) | diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.c b/drivers/net/ethernet/chelsio/cxgb4/srq.c index 6228a5708307..82b70a565e24 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/srq.c +++ b/drivers/net/ethernet/chelsio/cxgb4/srq.c @@ -84,8 +84,7 @@ int cxgb4_get_srq_entry(struct net_device *dev, if (!skb) return -ENOMEM; req = (struct cpl_srq_table_req *) - __skb_put(skb, sizeof(*req)); - memset(req, 0, sizeof(*req)); + __skb_put_zero(skb, sizeof(*req)); INIT_TP_WR(req, 0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SRQ_TABLE_REQ, TID_TID_V(srq_idx) | diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 51b18035d691..90b5274a8ba1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -145,6 +145,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */ CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */ CH_PCI_ID_TABLE_FENTRY(0x5018), /* T540-BT */ + CH_PCI_ID_TABLE_FENTRY(0x5019), /* T540-LP-BT */ + CH_PCI_ID_TABLE_FENTRY(0x501a), /* T540-SO-BT */ + CH_PCI_ID_TABLE_FENTRY(0x501b), /* T540-SO-CR */ CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */ CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */ CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index 123e2c1b65f5..4eb15ceddca3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -36,8 +36,8 @@ #define __T4FW_VERSION_H__ #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x10 -#define T4FW_VERSION_MICRO 0x3F +#define T4FW_VERSION_MINOR 0x13 +#define T4FW_VERSION_MICRO 0x01 #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -45,8 +45,8 @@ #define T4FW_MIN_VERSION_MICRO 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x10 -#define T5FW_VERSION_MICRO 0x3F +#define T5FW_VERSION_MINOR 0x13 +#define T5FW_VERSION_MICRO 0x01 #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -54,8 +54,8 @@ #define T5FW_MIN_VERSION_MICRO 0x00 #define T6FW_VERSION_MAJOR 0x01 -#define T6FW_VERSION_MINOR 0x10 -#define T6FW_VERSION_MICRO 0x3F +#define T6FW_VERSION_MINOR 0x13 +#define T6FW_VERSION_MICRO 0x01 #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 9a81b52307a9..71f13bd2b5e4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1419,6 +1419,22 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev, base->duplex = DUPLEX_UNKNOWN; } + if (pi->link_cfg.fc & PAUSE_RX) { + if (pi->link_cfg.fc & PAUSE_TX) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + Pause); + } else { + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + Asym_Pause); + } + } else if (pi->link_cfg.fc & PAUSE_TX) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + Asym_Pause); + } + base->autoneg = pi->link_cfg.autoneg; if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG) ethtool_link_ksettings_add_link_mode(link_ksettings, diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h index 4b5aacc09cab..240ba9d4c399 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -90,8 +90,7 @@ cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan) { struct cpl_tid_release *req; - req = __skb_put(skb, len); - memset(req, 0, len); + req = __skb_put_zero(skb, len); INIT_TP_WR(req, tid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); @@ -104,8 +103,7 @@ cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, { struct cpl_close_con_req *req; - req = __skb_put(skb, len); - memset(req, 0, len); + req = __skb_put_zero(skb, len); INIT_TP_WR(req, tid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); @@ -119,8 +117,7 @@ cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, { struct cpl_abort_req *req; - req = __skb_put(skb, len); - memset(req, 0, len); + req = __skb_put_zero(skb, len); INIT_TP_WR(req, tid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); @@ -134,8 +131,7 @@ cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan) { struct cpl_abort_rpl *rpl; - rpl = __skb_put(skb, len); - memset(rpl, 0, len); + rpl = __skb_put_zero(skb, len); INIT_TP_WR(rpl, tid); OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); @@ -149,8 +145,7 @@ cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan, { struct cpl_rx_data_ack *req; - req = __skb_put(skb, len); - memset(req, 0, len); + req = __skb_put_zero(skb, len); INIT_TP_WR(req, tid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 6552d68ea6e1..ce6e24c74978 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1391,12 +1391,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params) /* FM_WRONG_RESET_VALUES_ERRATA_FMAN_A005127 Errata * workaround */ - if (port->rev_info.major >= 6) { - u32 reg; + u32 reg; - reg = 0x00001013; - iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp); - } + reg = 0x00001013; + iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp); } return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 8c55965a66ac..c4e295094da7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -502,7 +502,7 @@ static int hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto, /* find outer header point */ l3.hdr = skb_network_header(skb); - l4_hdr = skb_inner_transport_header(skb); + l4_hdr = skb_transport_header(skb); if (skb->protocol == htons(ETH_P_IPV6)) { exthdr = l3.hdr + sizeof(*l3.v6); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index ff13d1876d9e..fab70683bbf7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -31,6 +31,17 @@ static int hclge_ring_space(struct hclge_cmq_ring *ring) return ring->desc_num - used - 1; } +static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h) +{ + int u = ring->next_to_use; + int c = ring->next_to_clean; + + if (unlikely(h >= ring->desc_num)) + return 0; + + return u > c ? (h > c && h <= u) : (h > c || h <= u); +} + static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring) { int size = ring->desc_num * sizeof(struct hclge_desc); @@ -141,6 +152,7 @@ static void hclge_cmd_init_regs(struct hclge_hw *hw) static int hclge_cmd_csq_clean(struct hclge_hw *hw) { + struct hclge_dev *hdev = (struct hclge_dev *)hw->back; struct hclge_cmq_ring *csq = &hw->cmq.csq; u16 ntc = csq->next_to_clean; struct hclge_desc *desc; @@ -149,6 +161,13 @@ static int hclge_cmd_csq_clean(struct hclge_hw *hw) desc = &csq->desc[ntc]; head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG); + rmb(); /* Make sure head is ready before touch any data */ + + if (!is_valid_csq_clean_head(csq, head)) { + dev_warn(&hdev->pdev->dev, "wrong head (%d, %d-%d)\n", head, + csq->next_to_use, csq->next_to_clean); + return 0; + } while (head != ntc) { memset(desc, 0, sizeof(*desc)); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2066dd734444..dd5d65c9cca6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -304,8 +304,6 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = { HCLGE_MAC_STATS_FIELD_OFF(mac_tx_2048_4095_oct_pkt_num)}, {"mac_tx_4096_8191_oct_pkt_num", HCLGE_MAC_STATS_FIELD_OFF(mac_tx_4096_8191_oct_pkt_num)}, - {"mac_tx_8192_12287_oct_pkt_num", - HCLGE_MAC_STATS_FIELD_OFF(mac_tx_8192_12287_oct_pkt_num)}, {"mac_tx_8192_9216_oct_pkt_num", HCLGE_MAC_STATS_FIELD_OFF(mac_tx_8192_9216_oct_pkt_num)}, {"mac_tx_9217_12287_oct_pkt_num", @@ -356,8 +354,6 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = { HCLGE_MAC_STATS_FIELD_OFF(mac_rx_2048_4095_oct_pkt_num)}, {"mac_rx_4096_8191_oct_pkt_num", HCLGE_MAC_STATS_FIELD_OFF(mac_rx_4096_8191_oct_pkt_num)}, - {"mac_rx_8192_12287_oct_pkt_num", - HCLGE_MAC_STATS_FIELD_OFF(mac_rx_8192_12287_oct_pkt_num)}, {"mac_rx_8192_9216_oct_pkt_num", HCLGE_MAC_STATS_FIELD_OFF(mac_rx_8192_9216_oct_pkt_num)}, {"mac_rx_9217_12287_oct_pkt_num", @@ -1459,8 +1455,11 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) /* We need to alloc a vport for main NIC of PF */ num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; - if (hdev->num_tqps < num_vport) - num_vport = hdev->num_tqps; + if (hdev->num_tqps < num_vport) { + dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)", + hdev->num_tqps, num_vport); + return -EINVAL; + } /* Alloc the same number of TQPs for every vport */ tqp_per_vport = hdev->num_tqps / num_vport; @@ -4540,8 +4539,9 @@ static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable) hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, enable); } -int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, - bool is_kill, u16 vlan, u8 qos, __be16 proto) +static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, + bool is_kill, u16 vlan, u8 qos, + __be16 proto) { #define HCLGE_MAX_VF_BYTES 16 struct hclge_vlan_filter_vf_cfg_cmd *req0; @@ -4599,12 +4599,9 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, return -EIO; } -static int hclge_set_port_vlan_filter(struct hnae3_handle *handle, - __be16 proto, u16 vlan_id, - bool is_kill) +static int hclge_set_port_vlan_filter(struct hclge_dev *hdev, __be16 proto, + u16 vlan_id, bool is_kill) { - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; struct hclge_vlan_filter_pf_cfg_cmd *req; struct hclge_desc desc; u8 vlan_offset_byte_val; @@ -4624,22 +4621,66 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle, req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val; ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "port vlan command, send fail, ret =%d.\n", ret); + return ret; +} + +static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto, + u16 vport_id, u16 vlan_id, u8 qos, + bool is_kill) +{ + u16 vport_idx, vport_num = 0; + int ret; + + ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id, + 0, proto); if (ret) { dev_err(&hdev->pdev->dev, - "port vlan command, send fail, ret =%d.\n", - ret); + "Set %d vport vlan filter config fail, ret =%d.\n", + vport_id, ret); return ret; } - ret = hclge_set_vf_vlan_common(hdev, 0, is_kill, vlan_id, 0, proto); - if (ret) { + /* vlan 0 may be added twice when 8021q module is enabled */ + if (!is_kill && !vlan_id && + test_bit(vport_id, hdev->vlan_table[vlan_id])) + return 0; + + if (!is_kill && test_and_set_bit(vport_id, hdev->vlan_table[vlan_id])) { dev_err(&hdev->pdev->dev, - "Set pf vlan filter config fail, ret =%d.\n", - ret); - return -EIO; + "Add port vlan failed, vport %d is already in vlan %d\n", + vport_id, vlan_id); + return -EINVAL; } - return 0; + if (is_kill && + !test_and_clear_bit(vport_id, hdev->vlan_table[vlan_id])) { + dev_err(&hdev->pdev->dev, + "Delete port vlan failed, vport %d is not in vlan %d\n", + vport_id, vlan_id); + return -EINVAL; + } + + for_each_set_bit(vport_idx, hdev->vlan_table[vlan_id], VLAN_N_VID) + vport_num++; + + if ((is_kill && vport_num == 0) || (!is_kill && vport_num == 1)) + ret = hclge_set_port_vlan_filter(hdev, proto, vlan_id, + is_kill); + + return ret; +} + +int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, + u16 vlan_id, bool is_kill) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + return hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id, vlan_id, + 0, is_kill); } static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, @@ -4653,7 +4694,7 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, if (proto != htons(ETH_P_8021Q)) return -EPROTONOSUPPORT; - return hclge_set_vf_vlan_common(hdev, vfid, false, vlan, qos, proto); + return hclge_set_vlan_filter_hw(hdev, proto, vfid, vlan, qos, false); } static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport) @@ -4818,7 +4859,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) } handle = &hdev->vport[0].nic; - return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false); + return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false); } static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) @@ -5166,12 +5207,6 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, struct phy_device *phydev = hdev->hw.mac.phydev; u32 fc_autoneg; - /* Only support flow control negotiation for netdev with - * phy attached for now. - */ - if (!phydev) - return -EOPNOTSUPP; - fc_autoneg = hclge_get_autoneg(handle); if (auto_neg != fc_autoneg) { dev_info(&hdev->pdev->dev, @@ -5190,6 +5225,12 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, if (!fc_autoneg) return hclge_cfg_pauseparam(hdev, rx_en, tx_en); + /* Only support flow control negotiation for netdev with + * phy attached for now. + */ + if (!phydev) + return -EOPNOTSUPP; + return phy_start_aneg(phydev); } @@ -5427,7 +5468,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL); if (!hdev) { ret = -ENOMEM; - goto err_hclge_dev; + goto out; } hdev->pdev = pdev; @@ -5440,38 +5481,38 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) ret = hclge_pci_init(hdev); if (ret) { dev_err(&pdev->dev, "PCI init failed\n"); - goto err_pci_init; + goto out; } /* Firmware command queue initialize */ ret = hclge_cmd_queue_init(hdev); if (ret) { dev_err(&pdev->dev, "Cmd queue init failed, ret = %d.\n", ret); - return ret; + goto err_pci_uninit; } /* Firmware command initialize */ ret = hclge_cmd_init(hdev); if (ret) - goto err_cmd_init; + goto err_cmd_uninit; ret = hclge_get_cap(hdev); if (ret) { dev_err(&pdev->dev, "get hw capability error, ret = %d.\n", ret); - return ret; + goto err_cmd_uninit; } ret = hclge_configure(hdev); if (ret) { dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret); - return ret; + goto err_cmd_uninit; } ret = hclge_init_msi(hdev); if (ret) { dev_err(&pdev->dev, "Init MSI/MSI-X error, ret = %d.\n", ret); - return ret; + goto err_cmd_uninit; } ret = hclge_misc_irq_init(hdev); @@ -5479,69 +5520,71 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) dev_err(&pdev->dev, "Misc IRQ(vector0) init error, ret = %d.\n", ret); - return ret; + goto err_msi_uninit; } ret = hclge_alloc_tqps(hdev); if (ret) { dev_err(&pdev->dev, "Allocate TQPs error, ret = %d.\n", ret); - return ret; + goto err_msi_irq_uninit; } ret = hclge_alloc_vport(hdev); if (ret) { dev_err(&pdev->dev, "Allocate vport error, ret = %d.\n", ret); - return ret; + goto err_msi_irq_uninit; } ret = hclge_map_tqp(hdev); if (ret) { dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret); - return ret; + goto err_sriov_disable; } - ret = hclge_mac_mdio_config(hdev); - if (ret) { - dev_warn(&hdev->pdev->dev, - "mdio config fail ret=%d\n", ret); - return ret; + if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + ret = hclge_mac_mdio_config(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, + "mdio config fail ret=%d\n", ret); + goto err_sriov_disable; + } } ret = hclge_mac_init(hdev); if (ret) { dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret); - return ret; + goto err_mdiobus_unreg; } ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX); if (ret) { dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret); - return ret; + goto err_mdiobus_unreg; } ret = hclge_init_vlan_config(hdev); if (ret) { dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret); - return ret; + goto err_mdiobus_unreg; } ret = hclge_tm_schd_init(hdev); if (ret) { dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret); - return ret; + goto err_mdiobus_unreg; } hclge_rss_init_cfg(hdev); ret = hclge_rss_init_hw(hdev); if (ret) { dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret); - return ret; + goto err_mdiobus_unreg; } ret = init_mgr_tbl(hdev); if (ret) { dev_err(&pdev->dev, "manager table init fail, ret =%d\n", ret); - return ret; + goto err_mdiobus_unreg; } hclge_dcb_ops_set(hdev); @@ -5564,11 +5607,24 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME); return 0; -err_cmd_init: +err_mdiobus_unreg: + if (hdev->hw.mac.phydev) + mdiobus_unregister(hdev->hw.mac.mdio_bus); +err_sriov_disable: + if (IS_ENABLED(CONFIG_PCI_IOV)) + hclge_disable_sriov(hdev); +err_msi_irq_uninit: + hclge_misc_irq_uninit(hdev); +err_msi_uninit: + pci_free_irq_vectors(pdev); +err_cmd_uninit: + hclge_destroy_cmd_queue(&hdev->hw); +err_pci_uninit: + pci_clear_master(pdev); pci_release_regions(pdev); -err_pci_init: + pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); -err_hclge_dev: +out: return ret; } @@ -5586,6 +5642,7 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) set_bit(HCLGE_STATE_DOWN, &hdev->state); hclge_stats_clear(hdev); + memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table)); ret = hclge_cmd_init(hdev); if (ret) { @@ -6203,7 +6260,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_fw_version = hclge_get_fw_version, .get_mdix_mode = hclge_get_mdix_mode, .enable_vlan_filter = hclge_enable_vlan_filter, - .set_vlan_filter = hclge_set_port_vlan_filter, + .set_vlan_filter = hclge_set_vlan_filter, .set_vf_vlan_filter = hclge_set_vf_vlan_filter, .enable_hw_strip_rxvtag = hclge_en_hw_strip_rxvtag, .reset_event = hclge_reset_event, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 0f4157e71282..b7ee91daea0c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -12,6 +12,8 @@ #include <linux/fs.h> #include <linux/types.h> #include <linux/phy.h> +#include <linux/if_vlan.h> + #include "hclge_cmd.h" #include "hnae3.h" @@ -406,9 +408,9 @@ struct hclge_mac_stats { u64 mac_tx_1519_2047_oct_pkt_num; u64 mac_tx_2048_4095_oct_pkt_num; u64 mac_tx_4096_8191_oct_pkt_num; - u64 mac_tx_8192_12287_oct_pkt_num; /* valid for GE MAC only */ - u64 mac_tx_8192_9216_oct_pkt_num; /* valid for LGE & CGE MAC only */ - u64 mac_tx_9217_12287_oct_pkt_num; /* valid for LGE & CGE MAC */ + u64 rsv0; + u64 mac_tx_8192_9216_oct_pkt_num; + u64 mac_tx_9217_12287_oct_pkt_num; u64 mac_tx_12288_16383_oct_pkt_num; u64 mac_tx_1519_max_good_oct_pkt_num; u64 mac_tx_1519_max_bad_oct_pkt_num; @@ -433,9 +435,9 @@ struct hclge_mac_stats { u64 mac_rx_1519_2047_oct_pkt_num; u64 mac_rx_2048_4095_oct_pkt_num; u64 mac_rx_4096_8191_oct_pkt_num; - u64 mac_rx_8192_12287_oct_pkt_num;/* valid for GE MAC only */ - u64 mac_rx_8192_9216_oct_pkt_num; /* valid for LGE & CGE MAC only */ - u64 mac_rx_9217_12287_oct_pkt_num; /* valid for LGE & CGE MAC only */ + u64 rsv1; + u64 mac_rx_8192_9216_oct_pkt_num; + u64 mac_rx_9217_12287_oct_pkt_num; u64 mac_rx_12288_16383_oct_pkt_num; u64 mac_rx_1519_max_good_oct_pkt_num; u64 mac_rx_1519_max_bad_oct_pkt_num; @@ -471,6 +473,7 @@ struct hclge_vlan_type_cfg { u16 tx_in_vlan_type; }; +#define HCLGE_VPORT_NUM 256 struct hclge_dev { struct pci_dev *pdev; struct hnae3_ae_dev *ae_dev; @@ -562,6 +565,7 @@ struct hclge_dev { u64 rx_pkts_for_led; u64 tx_pkts_for_led; + unsigned long vlan_table[VLAN_N_VID][BITS_TO_LONGS(HCLGE_VPORT_NUM)]; }; /* VPort level vlan tag configuration for TX direction */ @@ -646,8 +650,8 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue) } int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex); -int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid, - bool is_kill, u16 vlan, u8 qos, __be16 proto); +int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, + u16 vlan_id, bool is_kill); int hclge_buffer_alloc(struct hclge_dev *hdev); int hclge_rss_init_hw(struct hclge_dev *hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index a6f7ffa9c259..7563335b0c7f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -264,19 +264,18 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req, bool gen_resp) { - struct hclge_dev *hdev = vport->back; int status = 0; if (mbx_req->msg[1] == HCLGE_MBX_VLAN_FILTER) { + struct hnae3_handle *handle = &vport->nic; u16 vlan, proto; bool is_kill; is_kill = !!mbx_req->msg[2]; memcpy(&vlan, &mbx_req->msg[3], sizeof(vlan)); memcpy(&proto, &mbx_req->msg[5], sizeof(proto)); - status = hclge_set_vf_vlan_common(hdev, vport->vport_id, - is_kill, vlan, 0, - cpu_to_be16(proto)); + status = hclge_set_vlan_filter(handle, cpu_to_be16(proto), + vlan, is_kill); } if (gen_resp) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 682c2d6618e7..9f7932e423b5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -140,8 +140,11 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev) struct mii_bus *mdio_bus; int ret; - if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) - return 0; + if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) { + dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n", + hdev->hw.mac.phy_addr); + return -EINVAL; + } mdio_bus = devm_mdiobus_alloc(&hdev->pdev->dev); if (!mdio_bus) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 885f25cd7be4..c69ecab460f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -134,11 +134,8 @@ static int hclge_pfc_stats_get(struct hclge_dev *hdev, } ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM); - if (ret) { - dev_err(&hdev->pdev->dev, - "Get pfc pause stats fail, ret = %d.\n", ret); + if (ret) return ret; - } for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) { struct hclge_pfc_stats_cmd *pfc_stats = diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig index 08db24954f7e..e4e8b24c1a5d 100644 --- a/drivers/net/ethernet/huawei/hinic/Kconfig +++ b/drivers/net/ethernet/huawei/hinic/Kconfig @@ -4,7 +4,7 @@ config HINIC tristate "Huawei Intelligent PCIE Network Interface Card" - depends on (PCI_MSI && X86) + depends on (PCI_MSI && (X86 || ARM64)) ---help--- This driver supports HiNIC PCIE Ethernet cards. To compile this driver as part of the kernel, choose Y here. diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 41ad56edfb96..27d5f27163d2 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel PRO/100 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2006 Intel Corporation. */ /* * e100.c: Intel(R) PRO/100 ethernet driver diff --git a/drivers/net/ethernet/intel/e1000/Makefile b/drivers/net/ethernet/intel/e1000/Makefile index c7caadd3c8af..314c52d44b7c 100644 --- a/drivers/net/ethernet/intel/e1000/Makefile +++ b/drivers/net/ethernet/intel/e1000/Makefile @@ -1,31 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel PRO/1000 Linux driver # Copyright(c) 1999 - 2006 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# Linux NICS <linux.nics@intel.com> -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ # # Makefile for the Intel(R) PRO/1000 ethernet driver diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index 3a0feea2df54..c40729b2c184 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -1,32 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - +/* Copyright(c) 1999 - 2006 Intel Corporation. */ /* Linux PRO/1000 Ethernet Driver main header file */ diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 3e80ca170dd7..5d365a986bb0 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2006 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 1999 - 2006 Intel Corporation. */ /* ethtool support for e1000 */ diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 6e7e923d57bf..48428d6a00be 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* -* - Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - - */ +/* Copyright(c) 1999 - 2006 Intel Corporation. */ /* e1000_hw.c * Shared functions for accessing and configuring the MAC diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.h b/drivers/net/ethernet/intel/e1000/e1000_hw.h index f09c569ec19b..b57a04954ccf 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.h +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2006 Intel Corporation. */ /* e1000_hw.h * Structures, enums, and macros for the MAC diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index d5eb19b86a0a..2110d5f2da19 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2006 Intel Corporation. */ #include "e1000.h" #include <net/ip6_checksum.h> diff --git a/drivers/net/ethernet/intel/e1000/e1000_osdep.h b/drivers/net/ethernet/intel/e1000/e1000_osdep.h index ae0559b8b011..e966bb290797 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_osdep.h +++ b/drivers/net/ethernet/intel/e1000/e1000_osdep.h @@ -1,32 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - +/* Copyright(c) 1999 - 2006 Intel Corporation. */ /* glue for the OS independent part of e1000 * includes register access macros diff --git a/drivers/net/ethernet/intel/e1000/e1000_param.c b/drivers/net/ethernet/intel/e1000/e1000_param.c index 345f23927bcc..d3f29ffe1e47 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_param.c +++ b/drivers/net/ethernet/intel/e1000/e1000_param.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2006 Intel Corporation. */ #include "e1000.h" diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index 953e99df420c..257bd59bc9c6 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* 80003ES2LAN Gigabit Ethernet Controller (Copper) * 80003ES2LAN Gigabit Ethernet Controller (Serdes) diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.h b/drivers/net/ethernet/intel/e1000e/80003es2lan.h index ee6d1256fda4..aa9d639c6cbb 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.h +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_80003ES2LAN_H_ #define _E1000E_80003ES2LAN_H_ diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 924f2c8dfa6c..b9309302c29e 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* 82571EB Gigabit Ethernet Controller * 82571EB Gigabit Ethernet Controller (Copper) diff --git a/drivers/net/ethernet/intel/e1000e/82571.h b/drivers/net/ethernet/intel/e1000e/82571.h index 9a24c645f726..834c238d02db 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.h +++ b/drivers/net/ethernet/intel/e1000e/82571.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_82571_H_ #define _E1000E_82571_H_ diff --git a/drivers/net/ethernet/intel/e1000e/Makefile b/drivers/net/ethernet/intel/e1000e/Makefile index 24e391a4ac68..44e58b6e7660 100644 --- a/drivers/net/ethernet/intel/e1000e/Makefile +++ b/drivers/net/ethernet/intel/e1000e/Makefile @@ -1,30 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel PRO/1000 Linux driver -# Copyright(c) 1999 - 2014 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see <http://www.gnu.org/licenses/>. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# Linux NICS <linux.nics@intel.com> -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ +# Copyright(c) 1999 - 2018 Intel Corporation. # # Makefile for the Intel(R) PRO/1000 ethernet driver diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 22883015a695..fd550dee4982 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000_DEFINES_H_ #define _E1000_DEFINES_H_ diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index da88555ba1fd..c760dc72c520 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* Linux PRO/1000 Ethernet Driver main header file */ diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 64dc0c11147f..e084cb734eb1 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* ethtool support for e1000 */ diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 21802396bed6..eff75bd8a8f0 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000_HW_H_ #define _E1000_HW_H_ diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 1551d6ce5341..cdae0efde8e6 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* 82562G 10/100 Network Connection * 82562G-2 10/100 Network Connection diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 3c4f82c21084..eb09c755fa17 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_ICH8LAN_H_ #define _E1000E_ICH8LAN_H_ diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index b293464a9f27..4abd55d646c5 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "e1000.h" diff --git a/drivers/net/ethernet/intel/e1000e/mac.h b/drivers/net/ethernet/intel/e1000e/mac.h index cb0abf6c76a5..6ab261119801 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.h +++ b/drivers/net/ethernet/intel/e1000e/mac.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_MAC_H_ #define _E1000E_MAC_H_ diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c index e027660aeb92..c4c9b20bc51f 100644 --- a/drivers/net/ethernet/intel/e1000e/manage.c +++ b/drivers/net/ethernet/intel/e1000e/manage.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "e1000.h" diff --git a/drivers/net/ethernet/intel/e1000e/manage.h b/drivers/net/ethernet/intel/e1000e/manage.h index 3268f2e58593..d868aad806d4 100644 --- a/drivers/net/ethernet/intel/e1000e/manage.h +++ b/drivers/net/ethernet/intel/e1000e/manage.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_MANAGE_H_ #define _E1000E_MANAGE_H_ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ec4a9759a6f2..d3fef7fefea8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index 68949bb41b7b..937f9af22d26 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "e1000.h" diff --git a/drivers/net/ethernet/intel/e1000e/nvm.h b/drivers/net/ethernet/intel/e1000e/nvm.h index 8e082028be7d..6a30dfea4117 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.h +++ b/drivers/net/ethernet/intel/e1000e/nvm.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_NVM_H_ #define _E1000E_NVM_H_ diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c index 2def33eba9e6..098369fd3e65 100644 --- a/drivers/net/ethernet/intel/e1000e/param.c +++ b/drivers/net/ethernet/intel/e1000e/param.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/netdevice.h> #include <linux/module.h> diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index b8226ed0e338..42233019255a 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "e1000.h" diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h index d4180b5e9196..c48777d09523 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.h +++ b/drivers/net/ethernet/intel/e1000e/phy.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_PHY_H_ #define _E1000E_PHY_H_ diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index f941e5085f44..37c76945ad9b 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -1,24 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* PTP 1588 Hardware Clock (PHC) * Derived from PTP Hardware Clock driver for Intel 82576 and 82580 (igb) diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h index 16afc3c2a986..47f5ca793970 100644 --- a/drivers/net/ethernet/intel/e1000e/regs.h +++ b/drivers/net/ethernet/intel/e1000e/regs.h @@ -1,24 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel PRO/1000 Linux driver - * Copyright(c) 1999 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000E_REGS_H_ #define _E1000E_REGS_H_ diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile index 93277cb99cb7..26a9746ccb14 100644 --- a/drivers/net/ethernet/intel/fm10k/Makefile +++ b/drivers/net/ethernet/intel/fm10k/Makefile @@ -1,26 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel(R) Ethernet Switch Host Interface Driver -# Copyright(c) 2013 - 2016 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ +# Copyright(c) 2013 - 2018 Intel Corporation. # # Makefile for the Intel(R) Ethernet Switch Host Interface Driver diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index a9cdf763c59d..a903a0ba45e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -1,23 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _FM10K_H_ #define _FM10K_H_ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index e303d88720ef..f51a63fca513 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k_common.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h index 2bdb24d2ca9d..4c48fb73b3e7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h @@ -1,23 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _FM10K_COMMON_H_ #define _FM10K_COMMON_H_ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c index c4f733452ef2..20768ac7f17e 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c index 43e8d839831f..dca104121c05 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 28b6b4e56487..eeac2b75a195 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/vmalloc.h> diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 30395f5e5e87..e707d717012f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k.h" #include "fm10k_vf.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index df8607097e4a..3f536541f45f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/types.h> #include <linux/module.h> @@ -445,15 +427,14 @@ static void fm10k_type_trans(struct fm10k_ring *rx_ring, l2_accel = NULL; } - skb->protocol = eth_type_trans(skb, dev); - /* Record Rx queue, or update macvlan statistics */ if (!l2_accel) skb_record_rx_queue(skb, rx_ring->queue_index); else macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true, - (skb->pkt_type == PACKET_BROADCAST) || - (skb->pkt_type == PACKET_MULTICAST)); + false); + + skb->protocol = eth_type_trans(skb, dev); } /** diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index c01bf30a0c9e..21021fe4f1c3 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k_common.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h index 007e1dfa9b7a..56d1abff04e2 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h @@ -1,23 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _FM10K_MBX_H_ #define _FM10K_MBX_H_ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 45793491d4ba..c879af72bbf5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1,27 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k.h" #include <linux/vmalloc.h> #include <net/udp_tunnel.h> +#include <linux/if_macvlan.h> /** * fm10k_setup_tx_resources - allocate Tx resources (Descriptors) @@ -1254,7 +1237,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) glort = l2_accel->dglort + 1 + i; hw->mac.ops.update_xcast_mode(hw, glort, - FM10K_XCAST_MODE_MULTI); + FM10K_XCAST_MODE_NONE); fm10k_queue_mac_request(interface, glort, sdev->dev_addr, hw->mac.default_vid, true); @@ -1449,6 +1432,13 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, int size = 0, i; u16 glort; + /* The hardware supported by fm10k only filters on the destination MAC + * address. In order to avoid issues we only support offloading modes + * where the hardware can actually provide the functionality. + */ + if (!macvlan_supports_dest_filter(sdev)) + return ERR_PTR(-EMEDIUMTYPE); + /* allocate l2 accel structure if it is not available */ if (!l2_accel) { /* verify there is enough free GLORTs to support l2_accel */ @@ -1515,7 +1505,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, - FM10K_XCAST_MODE_MULTI); + FM10K_XCAST_MODE_NONE); fm10k_queue_mac_request(interface, glort, sdev->dev_addr, hw->mac.default_vid, true); } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index c4a2b688b38b..15071e4adb98 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/module.h> #include <linux/interrupt.h> diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 7ba54c534f8c..8f0a99b6a537 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k_pf.h" #include "fm10k_vf.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h index ae81f9a16602..8e814df709d2 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h @@ -1,23 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _FM10K_PF_H_ #define _FM10K_PF_H_ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c index 725ecb7abccd..2a7a40bf2b1c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k_tlv.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h index 5d2ee759507e..160bc5b78f99 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h @@ -1,23 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _FM10K_TLV_H_ #define _FM10K_TLV_H_ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index dd23af11e2c1..3e608e493f9d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -1,23 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _FM10K_TYPE_H_ #define _FM10K_TYPE_H_ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index f06913630b39..a8519c1f0406 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -1,23 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "fm10k_vf.h" diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h index 66a66b73a2f1..787d0d570a28 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h @@ -1,23 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _FM10K_VF_H_ #define _FM10K_VF_H_ diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile index 75437768a07c..14397e7e9925 100644 --- a/drivers/net/ethernet/intel/i40e/Makefile +++ b/drivers/net/ethernet/intel/i40e/Makefile @@ -1,29 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel Ethernet Controller XL710 Family Linux Driver -# Copyright(c) 2013 - 2015 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ +# Copyright(c) 2013 - 2018 Intel Corporation. # # Makefile for the Intel(R) Ethernet Connection XL710 (i40e.ko) driver diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index a44139c1de80..7a80652e2500 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_H_ #define _I40E_H_ @@ -334,10 +310,12 @@ struct i40e_tc_configuration { struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS]; }; +#define I40E_UDP_PORT_INDEX_UNUSED 255 struct i40e_udp_port_config { /* AdminQ command interface expects port number in Host byte order */ u16 port; u8 type; + u8 filter_index; }; /* macros related to FLX_PIT */ @@ -608,7 +586,7 @@ struct i40e_pf { unsigned long ptp_tx_start; struct hwtstamp_config tstamp_config; struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */ - u64 ptp_base_adj; + u32 ptp_adj_mult; u32 tx_hwtstamp_timeouts; u32 tx_hwtstamp_skipped; u32 rx_hwtstamp_cleared; @@ -1009,6 +987,9 @@ void i40e_service_event_schedule(struct i40e_pf *pf); void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len); +int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, bool is_xdp, + bool enable); +int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable); int i40e_vsi_start_rings(struct i40e_vsi *vsi); void i40e_vsi_stop_rings(struct i40e_vsi *vsi); void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 843fc7781ef8..ddbea79d18e5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_status.h" #include "i40e_type.h" diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h index 0a8749ee9fd3..edec3df78971 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_ADMINQ_H_ #define _I40E_ADMINQ_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 0244923edeb8..7d888e05f96f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_ADMINQ_CMD_H_ #define _I40E_ADMINQ_CMD_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h index abed0c52e782..cb8689222c8b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h +++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_ALLOC_H_ #define _I40E_ALLOC_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index d8ce4999864f..5f3b8b9ff511 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/list.h> #include <linux/errno.h> @@ -64,7 +40,7 @@ static struct i40e_ops i40e_lan_ops = { /** * i40e_client_get_params - Get the params that can change at runtime * @vsi: the VSI with the message - * @param: clinet param struct + * @params: client param struct * **/ static @@ -590,7 +566,7 @@ static int i40e_client_virtchnl_send(struct i40e_info *ldev, * i40e_client_setup_qvlist * @ldev: pointer to L2 context. * @client: Client pointer. - * @qv_info: queue and vector list + * @qvlist_info: queue and vector list * * Return 0 on success or < 0 on error **/ @@ -665,7 +641,7 @@ err: * i40e_client_request_reset * @ldev: pointer to L2 context. * @client: Client pointer. - * @level: reset level + * @reset_level: reset level **/ static void i40e_client_request_reset(struct i40e_info *ldev, struct i40e_client *client, diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h index 9d464d40bc17..72994baf4941 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.h +++ b/drivers/net/ethernet/intel/i40e/i40e_client.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_CLIENT_H_ #define _I40E_CLIENT_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index c0a3dae8a2db..eb2d1530d331 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_type.h" #include "i40e_adminq.h" @@ -1695,6 +1671,8 @@ enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw, /** * i40e_set_fc * @hw: pointer to the hw struct + * @aq_failures: buffer to return AdminQ failure information + * @atomic_restart: whether to enable atomic link restart * * Set the requested flow control mode using set_phy_config. **/ @@ -2831,8 +2809,8 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, * @mr_list: list of mirrored VSI SEIDs or VLAN IDs * @cmd_details: pointer to command details structure or NULL * @rule_id: Rule ID returned from FW - * @rule_used: Number of rules used in internal switch - * @rule_free: Number of rules free in internal switch + * @rules_used: Number of rules used in internal switch + * @rules_free: Number of rules free in internal switch * * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for * VEBs/VEPA elements only @@ -2892,8 +2870,8 @@ static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw, * @mr_list: list of mirrored VSI SEIDs or VLAN IDs * @cmd_details: pointer to command details structure or NULL * @rule_id: Rule ID returned from FW - * @rule_used: Number of rules used in internal switch - * @rule_free: Number of rules free in internal switch + * @rules_used: Number of rules used in internal switch + * @rules_free: Number of rules free in internal switch * * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only **/ @@ -2923,8 +2901,8 @@ i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, * add_mirrorrule. * @mr_list: list of mirrored VLAN IDs to be removed * @cmd_details: pointer to command details structure or NULL - * @rule_used: Number of rules used in internal switch - * @rule_free: Number of rules free in internal switch + * @rules_used: Number of rules used in internal switch + * @rules_free: Number of rules free in internal switch * * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only **/ @@ -3672,6 +3650,8 @@ i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent, /** * i40e_aq_start_lldp * @hw: pointer to the hw struct + * @buff: buffer for result + * @buff_size: buffer size * @cmd_details: pointer to command details structure or NULL * * Start the embedded LLDP Agent on all ports. @@ -3752,7 +3732,6 @@ i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw, * i40e_aq_add_udp_tunnel * @hw: pointer to the hw struct * @udp_port: the UDP port to add in Host byte order - * @header_len: length of the tunneling header length in DWords * @protocol_index: protocol index type * @filter_index: pointer to filter index * @cmd_details: pointer to command details structure or NULL @@ -3971,6 +3950,7 @@ i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw, * @hw: pointer to the hw struct * @seid: seid of the switching component connected to Physical Port * @ets_data: Buffer holding ETS parameters + * @opcode: Tx scheduler AQ command opcode * @cmd_details: pointer to command details structure or NULL **/ i40e_status i40e_aq_config_switch_comp_ets(struct i40e_hw *hw, @@ -4314,10 +4294,10 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw, * @hw: pointer to the hw struct * @seid: VSI seid to add ethertype filter from **/ -#define I40E_FLOW_CONTROL_ETHTYPE 0x8808 void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw, u16 seid) { +#define I40E_FLOW_CONTROL_ETHTYPE 0x8808 u16 flag = I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC | I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP | I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX; @@ -4448,6 +4428,7 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status) * @ret_buff_size: actual buffer size returned * @ret_next_table: next block to read * @ret_next_index: next index to read + * @cmd_details: pointer to command details structure or NULL * * Dump internal FW/HW data for debug purposes. * @@ -4574,7 +4555,7 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw, * i40e_read_phy_register_clause22 * @hw: pointer to the HW structure * @reg: register address in the page - * @phy_adr: PHY address on MDIO interface + * @phy_addr: PHY address on MDIO interface * @value: PHY register value * * Reads specified PHY register value @@ -4619,7 +4600,7 @@ i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw, * i40e_write_phy_register_clause22 * @hw: pointer to the HW structure * @reg: register address in the page - * @phy_adr: PHY address on MDIO interface + * @phy_addr: PHY address on MDIO interface * @value: PHY register value * * Writes specified PHY register value @@ -4660,7 +4641,7 @@ i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw, * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page - * @phy_adr: PHY address on MDIO interface + * @phy_addr: PHY address on MDIO interface * @value: PHY register value * * Reads specified PHY register value @@ -4734,7 +4715,7 @@ phy_read_end: * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page - * @phy_adr: PHY address on MDIO interface + * @phy_addr: PHY address on MDIO interface * @value: PHY register value * * Writes value to specified PHY register @@ -4801,7 +4782,7 @@ phy_write_end: * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page - * @phy_adr: PHY address on MDIO interface + * @phy_addr: PHY address on MDIO interface * @value: PHY register value * * Writes value to specified PHY register @@ -4837,7 +4818,7 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw, * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page - * @phy_adr: PHY address on MDIO interface + * @phy_addr: PHY address on MDIO interface * @value: PHY register value * * Reads specified PHY register value @@ -4872,7 +4853,6 @@ i40e_status i40e_read_phy_register(struct i40e_hw *hw, * i40e_get_phy_address * @hw: pointer to the HW structure * @dev_num: PHY port num that address we want - * @phy_addr: Returned PHY address * * Gets PHY address for current port **/ @@ -5082,7 +5062,9 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, * i40e_led_set_phy * @hw: pointer to the HW structure * @on: true or false + * @led_addr: address of led register to use * @mode: original val plus bit for set or ignore + * * Set led's on or off when controlled by the PHY * **/ @@ -5371,6 +5353,7 @@ i40e_status_code i40e_aq_write_ddp(struct i40e_hw *hw, void *buff, * @hw: pointer to the hw struct * @buff: command buffer (size in bytes = buff_size) * @buff_size: buffer size in bytes + * @flags: AdminQ command flags * @cmd_details: pointer to command details structure or NULL **/ enum diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 9fec728dc4b9..56bff8faf371 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_adminq.h" #include "i40e_prototype.h" @@ -945,6 +921,70 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw) } /** + * _i40e_read_lldp_cfg - generic read of LLDP Configuration data from NVM + * @hw: pointer to the HW structure + * @lldp_cfg: pointer to hold lldp configuration variables + * @module: address of the module pointer + * @word_offset: offset of LLDP configuration + * + * Reads the LLDP configuration data from NVM using passed addresses + **/ +static i40e_status _i40e_read_lldp_cfg(struct i40e_hw *hw, + struct i40e_lldp_variables *lldp_cfg, + u8 module, u32 word_offset) +{ + u32 address, offset = (2 * word_offset); + i40e_status ret; + __le16 raw_mem; + u16 mem; + + ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); + if (ret) + return ret; + + ret = i40e_aq_read_nvm(hw, 0x0, module * 2, sizeof(raw_mem), &raw_mem, + true, NULL); + i40e_release_nvm(hw); + if (ret) + return ret; + + mem = le16_to_cpu(raw_mem); + /* Check if this pointer needs to be read in word size or 4K sector + * units. + */ + if (mem & I40E_PTR_TYPE) + address = (0x7FFF & mem) * 4096; + else + address = (0x7FFF & mem) * 2; + + ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); + if (ret) + goto err_lldp_cfg; + + ret = i40e_aq_read_nvm(hw, module, offset, sizeof(raw_mem), &raw_mem, + true, NULL); + i40e_release_nvm(hw); + if (ret) + return ret; + + mem = le16_to_cpu(raw_mem); + offset = mem + word_offset; + offset *= 2; + + ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); + if (ret) + goto err_lldp_cfg; + + ret = i40e_aq_read_nvm(hw, 0, address + offset, + sizeof(struct i40e_lldp_variables), lldp_cfg, + true, NULL); + i40e_release_nvm(hw); + +err_lldp_cfg: + return ret; +} + +/** * i40e_read_lldp_cfg - read LLDP Configuration data from NVM * @hw: pointer to the HW structure * @lldp_cfg: pointer to hold lldp configuration variables @@ -955,21 +995,34 @@ i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw, struct i40e_lldp_variables *lldp_cfg) { i40e_status ret = 0; - u32 offset = (2 * I40E_NVM_LLDP_CFG_PTR); + u32 mem; if (!lldp_cfg) return I40E_ERR_PARAM; ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (ret) - goto err_lldp_cfg; + return ret; - ret = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR, offset, - sizeof(struct i40e_lldp_variables), - (u8 *)lldp_cfg, - true, NULL); + ret = i40e_aq_read_nvm(hw, I40E_SR_NVM_CONTROL_WORD, 0, sizeof(mem), + &mem, true, NULL); i40e_release_nvm(hw); + if (ret) + return ret; + + /* Read a bit that holds information whether we are running flat or + * structured NVM image. Flat image has LLDP configuration in shadow + * ram, so there is a need to pass different addresses for both cases. + */ + if (mem & I40E_SR_NVM_MAP_STRUCTURE_TYPE) { + /* Flat NVM case */ + ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_SR_EMP_MODULE_PTR, + I40E_SR_LLDP_CFG_PTR); + } else { + /* Good old structured NVM image */ + ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_EMP_MODULE_PTR, + I40E_NVM_LLDP_CFG_PTR); + } -err_lldp_cfg: return ret; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h index 4f806386cb22..2b748a60a843 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_DCB_H_ #define _I40E_DCB_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c index 502818e3da78..9deae9a35423 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifdef CONFIG_I40E_DCB #include "i40e.h" @@ -47,7 +23,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay) /** * i40e_dcbnl_ieee_getets - retrieve local IEEE ETS configuration - * @netdev: the corresponding netdev + * @dev: the corresponding netdev * @ets: structure to hold the ETS information * * Returns local IEEE ETS configuration @@ -86,8 +62,8 @@ static int i40e_dcbnl_ieee_getets(struct net_device *dev, /** * i40e_dcbnl_ieee_getpfc - retrieve local IEEE PFC configuration - * @netdev: the corresponding netdev - * @ets: structure to hold the PFC information + * @dev: the corresponding netdev + * @pfc: structure to hold the PFC information * * Returns local IEEE PFC configuration **/ @@ -119,7 +95,7 @@ static int i40e_dcbnl_ieee_getpfc(struct net_device *dev, /** * i40e_dcbnl_getdcbx - retrieve current DCBx capability - * @netdev: the corresponding netdev + * @dev: the corresponding netdev * * Returns DCBx capability features **/ @@ -132,7 +108,8 @@ static u8 i40e_dcbnl_getdcbx(struct net_device *dev) /** * i40e_dcbnl_get_perm_hw_addr - MAC address used by DCBx - * @netdev: the corresponding netdev + * @dev: the corresponding netdev + * @perm_addr: buffer to store the MAC address * * Returns the SAN MAC address used for LLDP exchange **/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d494dcaf18d0..56b911a5dd8b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifdef CONFIG_DEBUG_FS @@ -36,8 +12,8 @@ static struct dentry *i40e_dbg_root; /** * i40e_dbg_find_vsi - searches for the vsi with the given seid - * @pf - the PF structure to search for the vsi - * @seid - seid of the vsi it is searching for + * @pf: the PF structure to search for the vsi + * @seid: seid of the vsi it is searching for **/ static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid) { @@ -55,8 +31,8 @@ static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid) /** * i40e_dbg_find_veb - searches for the veb with the given seid - * @pf - the PF structure to search for the veb - * @seid - seid of the veb it is searching for + * @pf: the PF structure to search for the veb + * @seid: seid of the veb it is searching for **/ static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index ad6a66ccb576..334b05ff685a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_DEVIDS_H_ #define _I40E_DEVIDS_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c index df3e60470f8b..ef4d3762bf37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.c +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_diag.h" #include "i40e_prototype.h" diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h index be8341763475..c3340f320a18 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.h +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_DIAG_H_ #define _I40E_DIAG_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index b974482ff630..fc6a5eef141c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ /* ethtool support for i40e */ @@ -977,7 +953,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_test_link_mode(ks, advertising, 10000baseCR_Full) || ethtool_link_ksettings_test_link_mode(ks, advertising, - 10000baseSR_Full)) + 10000baseSR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseLR_Full)) config.link_speed |= I40E_LINK_SPEED_10GB; if (ethtool_link_ksettings_test_link_mode(ks, advertising, 20000baseKR2_Full)) @@ -1079,6 +1057,9 @@ static int i40e_nway_reset(struct net_device *netdev) /** * i40e_get_pauseparam - Get Flow Control status + * @netdev: netdevice structure + * @pause: buffer to return pause parameters + * * Return tx/rx-pause status **/ static void i40e_get_pauseparam(struct net_device *netdev, @@ -2550,7 +2531,7 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd) /** * i40e_check_mask - Check whether a mask field is set * @mask: the full mask value - * @field; mask of the field to check + * @field: mask of the field to check * * If the given mask is fully set, return positive value. If the mask for the * field is fully unset, return zero. Otherwise return a negative error code. @@ -2621,6 +2602,7 @@ static int i40e_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp, /** * i40e_fill_rx_flow_user_data - Fill in user-defined data field * @fsp: pointer to rx_flow specification + * @data: pointer to return userdef data * * Reads the userdef data structure and properly fills in the user defined * fields of the rx_flow_spec. @@ -2799,6 +2781,7 @@ no_input_set: * i40e_get_rxnfc - command to get RX flow classification rules * @netdev: network interface device structure * @cmd: ethtool rxnfc command + * @rule_locs: pointer to store rule data * * Returns Success if the command is supported. **/ @@ -2840,7 +2823,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, /** * i40e_get_rss_hash_bits - Read RSS Hash bits from register * @nfc: pointer to user request - * @i_setc bits currently set + * @i_setc: bits currently set * * Returns value of bits to be set per user request **/ @@ -2885,7 +2868,7 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) /** * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash * @pf: pointer to the physical function struct - * @cmd: ethtool rxnfc command + * @nfc: ethtool rxnfc command * * Returns Success if the flow input set is supported. **/ @@ -3284,7 +3267,7 @@ static int i40e_add_flex_offset(struct list_head *flex_pit_list, * __i40e_reprogram_flex_pit - Re-program specific FLX_PIT table * @pf: Pointer to the PF structure * @flex_pit_list: list of flexible src offsets in use - * #flex_pit_start: index to first entry for this section of the table + * @flex_pit_start: index to first entry for this section of the table * * In order to handle flexible data, the hardware uses a table of values * called the FLX_PIT table. This table is used to indicate which sections of @@ -3398,7 +3381,7 @@ static void i40e_reprogram_flex_pit(struct i40e_pf *pf) /** * i40e_flow_str - Converts a flow_type into a human readable string - * @flow_type: the flow type from a flow specification + * @fsp: the flow specification * * Currently only flow types we support are included here, and the string * value attempts to match what ethtool would use to configure this flow type. @@ -4103,7 +4086,7 @@ static unsigned int i40e_max_channels(struct i40e_vsi *vsi) /** * i40e_get_channels - Get the current channels enabled and max supported etc. - * @netdev: network interface device structure + * @dev: network interface device structure * @ch: ethtool channels structure * * We don't support separate tx and rx queues as channels. The other count @@ -4112,7 +4095,7 @@ static unsigned int i40e_max_channels(struct i40e_vsi *vsi) * q_vectors since we support a lot more queue pairs than q_vectors. **/ static void i40e_get_channels(struct net_device *dev, - struct ethtool_channels *ch) + struct ethtool_channels *ch) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; @@ -4131,14 +4114,14 @@ static void i40e_get_channels(struct net_device *dev, /** * i40e_set_channels - Set the new channels count. - * @netdev: network interface device structure + * @dev: network interface device structure * @ch: ethtool channels structure * * The new channels count may not be the same as requested by the user * since it gets rounded down to a power of 2 value. **/ static int i40e_set_channels(struct net_device *dev, - struct ethtool_channels *ch) + struct ethtool_channels *ch) { const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET; struct i40e_netdev_priv *np = netdev_priv(dev); @@ -4273,6 +4256,7 @@ out: * @netdev: network interface device structure * @indir: indirection table * @key: hash key + * @hfunc: hash function to use * * Returns -EINVAL if the table specifies an invalid queue id, otherwise * returns 0 after programming the table. diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c index 6d4b590f851b..19ce93d7fd0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c +++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_osdep.h" #include "i40e_register.h" @@ -198,7 +174,6 @@ exit: * @hw: pointer to our HW structure * @hmc_info: pointer to the HMC configuration information structure * @idx: the page index - * @is_pf: distinguishes a VF from a PF * * This function: * 1. Marks the entry in pd tabe (for paged address mode) or in sd table diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h index 7b5fd33d70ae..1c78de838857 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h +++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_HMC_H_ #define _I40E_HMC_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c index cd40dc487b38..994011c38fb4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c +++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_osdep.h" #include "i40e_register.h" diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h index 79e1396735d9..c46a2c449e60 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h +++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_LAN_HMC_H_ #define _I40E_LAN_HMC_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 16229998fb1e..c8659fbd7111 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/etherdevice.h> #include <linux/of_net.h> @@ -278,8 +254,8 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) /** * i40e_find_vsi_from_id - searches for the vsi with the given id - * @pf - the pf structure to search for the vsi - * @id - id of the vsi it is searching for + * @pf: the pf structure to search for the vsi + * @id: id of the vsi it is searching for **/ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id) { @@ -435,6 +411,7 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, /** * i40e_get_netdev_stats_struct - Get statistics for netdev interface * @netdev: network interface device structure + * @stats: data structure to store statistics * * Returns the address of the device statistics structure. * The statistics are actually updated from the service task. @@ -2027,7 +2004,7 @@ struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next) * from firmware * @count: Number of filters added * @add_list: return data from fw - * @head: pointer to first filter in current batch + * @add_head: pointer to first filter in current batch * * MAC filter entries from list were slated to be added to device. Returns * number of successful filters. Note that 0 does NOT mean success! @@ -2134,6 +2111,7 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, /** * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags * @vsi: pointer to the VSI + * @vsi_name: the VSI name * @f: filter data * * This function sets or clears the promiscuous broadcast flags for VLAN @@ -2840,6 +2818,7 @@ void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid) /** * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload * @netdev: network interface to be adjusted + * @proto: unused protocol value * @vid: vlan id to be added * * net_device_ops implementation for adding vlan ids @@ -2864,6 +2843,7 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev, /** * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload * @netdev: network interface to be adjusted + * @proto: unused protocol value * @vid: vlan id to be removed * * net_device_ops implementation for removing vlan ids @@ -3485,7 +3465,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) /** * i40e_enable_misc_int_causes - enable the non-queue interrupts - * @hw: ptr to the hardware info + * @pf: pointer to private device data structure **/ static void i40e_enable_misc_int_causes(struct i40e_pf *pf) { @@ -4255,8 +4235,8 @@ static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable) * @is_xdp: true if the queue is used for XDP * @enable: start or stop the queue **/ -static int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, - bool is_xdp, bool enable) +int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, + bool is_xdp, bool enable) { int ret; @@ -4301,7 +4281,6 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) if (ret) break; } - return ret; } @@ -4340,9 +4319,9 @@ static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable) * @pf_q: the PF queue to configure * @enable: start or stop the queue * - * This function enables or disables a single queue. Note that any delay - * required after the operation is expected to be handled by the caller of - * this function. + * This function enables or disables a single queue. Note that + * any delay required after the operation is expected to be + * handled by the caller of this function. **/ static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable) { @@ -4372,6 +4351,30 @@ static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable) } /** + * i40e_control_wait_rx_q + * @pf: the PF structure + * @pf_q: queue being configured + * @enable: start or stop the rings + * + * This function enables or disables a single queue along with waiting + * for the change to finish. The caller of this function should handle + * the delays needed in the case of disabling queues. + **/ +int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable) +{ + int ret = 0; + + i40e_control_rx_q(pf, pf_q, enable); + + /* wait for the change to finish */ + ret = i40e_pf_rxq_wait(pf, pf_q, enable); + if (ret) + return ret; + + return ret; +} + +/** * i40e_vsi_control_rx - Start or stop a VSI's rings * @vsi: the VSI being configured * @enable: start or stop the rings @@ -4383,10 +4386,7 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - i40e_control_rx_q(pf, pf_q, enable); - - /* wait for the change to finish */ - ret = i40e_pf_rxq_wait(pf, pf_q, enable); + ret = i40e_control_wait_rx_q(pf, pf_q, enable); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d Rx ring %d %sable timeout\n", @@ -5096,7 +5096,7 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC * @vsi: the VSI being configured * @enabled_tc: TC bitmap - * @bw_credits: BW shared credits per TC + * @bw_share: BW shared credits per TC * * Returns 0 on success, negative value on failure **/ @@ -6353,6 +6353,7 @@ out: /** * i40e_print_link_message - print link up or down * @vsi: the VSI for which link needs a message + * @isup: true of link is up, false otherwise */ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) { @@ -7212,8 +7213,7 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, if (mask->dst == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { - mask->dst = be32_to_cpu(mask->dst); - dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n", + dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n", &mask->dst); return I40E_ERR_CONFIG; } @@ -7223,8 +7223,7 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, if (mask->src == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { - mask->src = be32_to_cpu(mask->src); - dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n", + dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n", &mask->src); return I40E_ERR_CONFIG; } @@ -9691,9 +9690,9 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) i40e_flush(hw); } -static const char *i40e_tunnel_name(struct i40e_udp_port_config *port) +static const char *i40e_tunnel_name(u8 type) { - switch (port->type) { + switch (type) { case UDP_TUNNEL_TYPE_VXLAN: return "vxlan"; case UDP_TUNNEL_TYPE_GENEVE: @@ -9727,37 +9726,68 @@ static void i40e_sync_udp_filters(struct i40e_pf *pf) static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - i40e_status ret; + u8 filter_index, type; u16 port; int i; if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state)) return; + /* acquire RTNL to maintain state of flags and port requests */ + rtnl_lock(); + for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { if (pf->pending_udp_bitmap & BIT_ULL(i)) { + struct i40e_udp_port_config *udp_port; + i40e_status ret = 0; + + udp_port = &pf->udp_ports[i]; pf->pending_udp_bitmap &= ~BIT_ULL(i); - port = pf->udp_ports[i].port; + + port = READ_ONCE(udp_port->port); + type = READ_ONCE(udp_port->type); + filter_index = READ_ONCE(udp_port->filter_index); + + /* release RTNL while we wait on AQ command */ + rtnl_unlock(); + if (port) ret = i40e_aq_add_udp_tunnel(hw, port, - pf->udp_ports[i].type, - NULL, NULL); - else - ret = i40e_aq_del_udp_tunnel(hw, i, NULL); + type, + &filter_index, + NULL); + else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED) + ret = i40e_aq_del_udp_tunnel(hw, filter_index, + NULL); + + /* reacquire RTNL so we can update filter_index */ + rtnl_lock(); if (ret) { dev_info(&pf->pdev->dev, "%s %s port %d, index %d failed, err %s aq_err %s\n", - i40e_tunnel_name(&pf->udp_ports[i]), + i40e_tunnel_name(type), port ? "add" : "delete", - port, i, + port, + filter_index, i40e_stat_str(&pf->hw, ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - pf->udp_ports[i].port = 0; + if (port) { + /* failed to add, just reset port, + * drop pending bit for any deletion + */ + udp_port->port = 0; + pf->pending_udp_bitmap &= ~BIT_ULL(i); + } + } else if (port) { + /* record filter index on success */ + udp_port->filter_index = filter_index; } } } + + rtnl_unlock(); } /** @@ -10004,7 +10034,7 @@ unlock_pf: /** * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI - * @type: VSI pointer + * @vsi: VSI pointer * @free_qvectors: a bool to specify if q_vectors need to be freed. * * On error: returns error code (negative) @@ -10800,7 +10830,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) * @vsi: Pointer to VSI structure * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries - * lut_size: Size of buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ @@ -11374,6 +11404,11 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port) u8 i; for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { + /* Do not report ports with pending deletions as + * being available. + */ + if (!port && (pf->pending_udp_bitmap & BIT_ULL(i))) + continue; if (pf->udp_ports[i].port == port) return i; } @@ -11428,6 +11463,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev, /* New port: add it and mark its index in the bitmap */ pf->udp_ports[next_idx].port = port; + pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED; pf->pending_udp_bitmap |= BIT_ULL(next_idx); set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); } @@ -11469,7 +11505,12 @@ static void i40e_udp_tunnel_del(struct net_device *netdev, * and make it pending */ pf->udp_ports[idx].port = 0; - pf->pending_udp_bitmap |= BIT_ULL(idx); + + /* Toggle pending bit instead of setting it. This way if we are + * deleting a port that has yet to be added we just clear the pending + * bit and don't have to worry about it. + */ + pf->pending_udp_bitmap ^= BIT_ULL(idx); set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); return; @@ -11500,6 +11541,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev, * @tb: pointer to array of nladdr (unused) * @dev: the net device pointer * @addr: the MAC address entry being added + * @vid: VLAN ID * @flags: instructions from stack about fdb operation */ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], @@ -11545,6 +11587,7 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * i40e_ndo_bridge_setlink - Set the hardware bridge mode * @dev: the netdev being configured * @nlh: RTNL message + * @flags: bridge flags * * Inserts a new hardware bridge if not already created and * enables the bridging mode requested (VEB or VEPA). If the @@ -14118,6 +14161,7 @@ static void i40e_remove(struct pci_dev *pdev) /** * i40e_pci_error_detected - warning that something funky happened in PCI land * @pdev: PCI device information struct + * @error: the type of PCI error * * Called to warn that something happened and the error handling steps * are in progress. Allows the driver to quiesce things, be ready for diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index ba9687c03795..0299e5bbb902 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_prototype.h" @@ -1173,6 +1149,7 @@ void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw) * i40e_nvmupd_check_wait_event - handle NVM update operation events * @hw: pointer to the hardware structure * @opcode: the event that just happened + * @desc: AdminQ descriptor **/ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode, struct i40e_aq_desc *desc) diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h index 9c3c3b0d3ac4..a07574bff550 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h +++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_OSDEP_H_ #define _I40E_OSDEP_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 2ec24188d6e2..3170655cdeb9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_PROTOTYPE_H_ #define _I40E_PROTOTYPE_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 5b47dd1f75a5..aa3daec2049d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e.h" #include <linux/ptp_classify.h> @@ -40,9 +16,9 @@ * At 1Gb link, the period is multiplied by 20. (32ns) * 1588 functionality is not supported at 100Mbps. */ -#define I40E_PTP_40GB_INCVAL 0x0199999999ULL -#define I40E_PTP_10GB_INCVAL 0x0333333333ULL -#define I40E_PTP_1GB_INCVAL 0x2000000000ULL +#define I40E_PTP_40GB_INCVAL 0x0199999999ULL +#define I40E_PTP_10GB_INCVAL_MULT 2 +#define I40E_PTP_1GB_INCVAL_MULT 20 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1 BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT) #define I40E_PRTTSYN_CTL1_TSYNTYPE_V2 (2 << \ @@ -130,17 +106,24 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) ppb = -ppb; } - smp_mb(); /* Force any pending update before accessing. */ - adj = READ_ONCE(pf->ptp_base_adj); - - freq = adj; + freq = I40E_PTP_40GB_INCVAL; freq *= ppb; diff = div_u64(freq, 1000000000ULL); if (neg_adj) - adj -= diff; + adj = I40E_PTP_40GB_INCVAL - diff; else - adj += diff; + adj = I40E_PTP_40GB_INCVAL + diff; + + /* At some link speeds, the base incval is so large that directly + * multiplying by ppb would result in arithmetic overflow even when + * using a u64. Avoid this by instead calculating the new incval + * always in terms of the 40GbE clock rate and then multiplying by the + * link speed factor afterwards. This does result in slightly lower + * precision at lower link speeds, but it is fairly minor. + */ + smp_mb(); /* Force any pending update before accessing. */ + adj *= READ_ONCE(pf->ptp_adj_mult); wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF); wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32); @@ -462,6 +445,7 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) struct i40e_link_status *hw_link_info; struct i40e_hw *hw = &pf->hw; u64 incval; + u32 mult; hw_link_info = &hw->phy.link_info; @@ -469,10 +453,10 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) switch (hw_link_info->link_speed) { case I40E_LINK_SPEED_10GB: - incval = I40E_PTP_10GB_INCVAL; + mult = I40E_PTP_10GB_INCVAL_MULT; break; case I40E_LINK_SPEED_1GB: - incval = I40E_PTP_1GB_INCVAL; + mult = I40E_PTP_1GB_INCVAL_MULT; break; case I40E_LINK_SPEED_100MB: { @@ -483,15 +467,20 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) "1588 functionality is not supported at 100 Mbps. Stopping the PHC.\n"); warn_once++; } - incval = 0; + mult = 0; break; } case I40E_LINK_SPEED_40GB: default: - incval = I40E_PTP_40GB_INCVAL; + mult = 1; break; } + /* The increment value is calculated by taking the base 40GbE incvalue + * and multiplying it by a factor based on the link speed. + */ + incval = I40E_PTP_40GB_INCVAL * mult; + /* Write the new increment value into the increment register. The * hardware will not update the clock until both registers have been * written. @@ -500,14 +489,14 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) wr32(hw, I40E_PRTTSYN_INC_H, incval >> 32); /* Update the base adjustement value. */ - WRITE_ONCE(pf->ptp_base_adj, incval); + WRITE_ONCE(pf->ptp_adj_mult, mult); smp_mb(); /* Force the above update. */ } /** * i40e_ptp_get_ts_config - ioctl interface to read the HW timestamping * @pf: Board private structure - * @ifreq: ioctl data + * @ifr: ioctl data * * Obtain the current hardware timestamping settigs as requested. To do this, * keep a shadow copy of the timestamp settings rather than attempting to @@ -651,7 +640,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, /** * i40e_ptp_set_ts_config - ioctl interface to control the HW timestamping * @pf: Board private structure - * @ifreq: ioctl data + * @ifr: ioctl data * * Respond to the user filter requests and make the appropriate hardware * changes here. The XL710 cannot support splitting of the Tx/Rx timestamping diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index b3e206e49cc2..52e3680c57f8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_REGISTER_H_ #define _I40E_REGISTER_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_status.h b/drivers/net/ethernet/intel/i40e/i40e_status.h index 10c86f63dc52..77be0702d07c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_status.h +++ b/drivers/net/ethernet/intel/i40e/i40e_status.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_STATUS_H_ #define _I40E_STATUS_H_ diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h index 410ba13bcf21..424f02077e2e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_trace.h +++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel(R) 40-10 Gigabit Ethernet Connection Network Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ /* Modeled on trace-events-sample.h */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f174c72480ab..5efa68de935b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/prefetch.h> #include <net/busy_poll.h> @@ -495,7 +471,7 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, /** * i40e_add_del_fdir - Build raw packets to add/del fdir filter * @vsi: pointer to the targeted VSI - * @cmd: command to get or set RX flow classification rules + * @input: filter to add or delete * @add: true adds a filter, false removes it * **/ @@ -638,7 +614,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) kfree(tx_buffer->raw_buf); else if (ring_is_xdp(ring)) - page_frag_free(tx_buffer->raw_buf); + xdp_return_frame(tx_buffer->xdpf); else dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) @@ -713,7 +689,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed - * @tx_ring: the ring of descriptors + * @ring: the ring of descriptors * @in_sw: use SW variables * * Since there is no access to the ring head register @@ -841,7 +817,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, /* free the skb/XDP data */ if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buf->raw_buf); + xdp_return_frame(tx_buf->xdpf); else napi_consume_skb(tx_buf->skb, napi_budget); @@ -1795,6 +1771,8 @@ static inline int i40e_ptype_to_htype(u8 ptype) * i40e_rx_hash - set the hash value in the skb * @ring: descriptor ring * @rx_desc: specific descriptor + * @skb: skb currently being received and modified + * @rx_ptype: Rx packet type **/ static inline void i40e_rx_hash(struct i40e_ring *ring, union i40e_rx_desc *rx_desc, @@ -2203,9 +2181,20 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, #define I40E_XDP_CONSUMED 1 #define I40E_XDP_TX 2 -static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, +static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, struct i40e_ring *xdp_ring); +static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, + struct i40e_ring *xdp_ring) +{ + struct xdp_frame *xdpf = convert_to_xdp_frame(xdp); + + if (unlikely(!xdpf)) + return I40E_XDP_CONSUMED; + + return i40e_xmit_xdp_ring(xdpf, xdp_ring); +} + /** * i40e_run_xdp - run an XDP program * @rx_ring: Rx ring being processed @@ -2225,13 +2214,15 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, if (!xdp_prog) goto xdp_out; + prefetchw(xdp->data_hard_start); /* xdp_frame write */ + act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: break; case XDP_TX: xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; - result = i40e_xmit_xdp_ring(xdp, xdp_ring); + result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); break; case XDP_REDIRECT: err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); @@ -3478,13 +3469,13 @@ dma_error: * @xdp: data to transmit * @xdp_ring: XDP Tx ring **/ -static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, +static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, struct i40e_ring *xdp_ring) { - u32 size = xdp->data_end - xdp->data; u16 i = xdp_ring->next_to_use; struct i40e_tx_buffer *tx_bi; struct i40e_tx_desc *tx_desc; + u32 size = xdpf->len; dma_addr_t dma; if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { @@ -3492,14 +3483,14 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, return I40E_XDP_CONSUMED; } - dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE); + dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE); if (dma_mapping_error(xdp_ring->dev, dma)) return I40E_XDP_CONSUMED; tx_bi = &xdp_ring->tx_bi[i]; tx_bi->bytecount = size; tx_bi->gso_segs = 1; - tx_bi->raw_buf = xdp->data; + tx_bi->xdpf = xdpf; /* record length, and DMA address */ dma_unmap_len_set(tx_bi, len, size); @@ -3675,7 +3666,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * * Returns Zero if sent, else an error code **/ -int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) { struct i40e_netdev_priv *np = netdev_priv(dev); unsigned int queue_index = smp_processor_id(); @@ -3688,7 +3679,7 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) return -ENXIO; - err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]); + err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); if (err != I40E_XDP_TX) return -ENOSPC; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 3043483ec426..fdd2c55f03a6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_TXRX_H_ #define _I40E_TXRX_H_ @@ -306,6 +282,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size) struct i40e_tx_buffer { struct i40e_tx_desc *next_to_watch; union { + struct xdp_frame *xdpf; struct sk_buff *skb; void *raw_buf; }; @@ -510,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); void i40e_detect_recover_hung(struct i40e_vsi *vsi); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); -int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp); +int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf); void i40e_xdp_flush(struct net_device *dev); /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index bfb80092b352..7df969c59855 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_TYPE_H_ #define _I40E_TYPE_H_ @@ -1318,7 +1294,8 @@ struct i40e_hw_port_stats { /* Checksum and Shadow RAM pointers */ #define I40E_SR_NVM_CONTROL_WORD 0x00 -#define I40E_SR_EMP_MODULE_PTR 0x0F +#define I40E_EMP_MODULE_PTR 0x0F +#define I40E_SR_EMP_MODULE_PTR 0x48 #define I40E_SR_PBA_FLAGS 0x15 #define I40E_SR_PBA_BLOCK_PTR 0x16 #define I40E_SR_BOOT_CONFIG_PTR 0x17 @@ -1337,6 +1314,8 @@ struct i40e_hw_port_stats { #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE 1024 #define I40E_SR_CONTROL_WORD_1_SHIFT 0x06 #define I40E_SR_CONTROL_WORD_1_MASK (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT) +#define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID BIT(5) +#define I40E_SR_NVM_MAP_STRUCTURE_TYPE BIT(12) #define I40E_PTR_TYPE BIT(15) #define I40E_SR_OCP_CFG_WORD0 0x2B #define I40E_SR_OCP_ENABLED BIT(15) @@ -1454,7 +1433,8 @@ enum i40e_reset_type { }; /* IEEE 802.1AB LLDP Agent Variables from NVM */ -#define I40E_NVM_LLDP_CFG_PTR 0xD +#define I40E_NVM_LLDP_CFG_PTR 0x06 +#define I40E_SR_LLDP_CFG_PTR 0x31 struct i40e_lldp_variables { u16 length; u16 adminstatus; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 35173cbe80f7..c6d24eaede18 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e.h" @@ -32,8 +8,8 @@ /** * i40e_vc_vf_broadcast * @pf: pointer to the PF structure - * @opcode: operation code - * @retval: return value + * @v_opcode: operation code + * @v_retval: return value * @msg: pointer to the msg buffer * @msglen: msg length * @@ -1663,6 +1639,7 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf, /** * i40e_vc_get_version_msg * @vf: pointer to the VF info + * @msg: pointer to the msg buffer * * called from the VF to request the API version used by the PF **/ @@ -1706,7 +1683,6 @@ static void i40e_del_qch(struct i40e_vf *vf) * i40e_vc_get_vf_resources_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to request its resources **/ @@ -1830,8 +1806,6 @@ err: /** * i40e_vc_reset_vf_msg * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to reset itself, * unlike other virtchnl messages, PF driver @@ -2180,6 +2154,51 @@ error_param: } /** + * i40e_ctrl_vf_tx_rings + * @vsi: the SRIOV VSI being configured + * @q_map: bit map of the queues to be enabled + * @enable: start or stop the queue + **/ +static int i40e_ctrl_vf_tx_rings(struct i40e_vsi *vsi, unsigned long q_map, + bool enable) +{ + struct i40e_pf *pf = vsi->back; + int ret = 0; + u16 q_id; + + for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) { + ret = i40e_control_wait_tx_q(vsi->seid, pf, + vsi->base_queue + q_id, + false /*is xdp*/, enable); + if (ret) + break; + } + return ret; +} + +/** + * i40e_ctrl_vf_rx_rings + * @vsi: the SRIOV VSI being configured + * @q_map: bit map of the queues to be enabled + * @enable: start or stop the queue + **/ +static int i40e_ctrl_vf_rx_rings(struct i40e_vsi *vsi, unsigned long q_map, + bool enable) +{ + struct i40e_pf *pf = vsi->back; + int ret = 0; + u16 q_id; + + for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) { + ret = i40e_control_wait_rx_q(pf, vsi->base_queue + q_id, + enable); + if (ret) + break; + } + return ret; +} + +/** * i40e_vc_enable_queues_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2211,8 +2230,17 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx])) + /* Use the queue bit map sent by the VF */ + if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues, + true)) { aq_ret = I40E_ERR_TIMEOUT; + goto error_param; + } + if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues, + true)) { + aq_ret = I40E_ERR_TIMEOUT; + goto error_param; + } /* need to start the rings for additional ADq VSI's as well */ if (vf->adq_enabled) { @@ -2260,8 +2288,17 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]); - + /* Use the queue bit map sent by the VF */ + if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues, + false)) { + aq_ret = I40E_ERR_TIMEOUT; + goto error_param; + } + if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues, + false)) { + aq_ret = I40E_ERR_TIMEOUT; + goto error_param; + } error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, @@ -3556,15 +3593,16 @@ err: * i40e_vc_process_vf_msg * @pf: pointer to the PF structure * @vf_id: source VF id + * @v_opcode: operation code + * @v_retval: unused return value code * @msg: pointer to the msg buffer * @msglen: msg length - * @msghndl: msg handle * * called from the common aeq/arq handler to * process request from VF **/ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, - u32 v_retval, u8 *msg, u16 msglen) + u32 __always_unused v_retval, u8 *msg, u16 msglen) { struct i40e_hw *hw = &pf->hw; int local_vf_id = vf_id - (s16)hw->func_caps.vf_base_id; @@ -4015,7 +4053,8 @@ error_pvid: * i40e_ndo_set_vf_bw * @netdev: network interface device structure * @vf_id: VF identifier - * @tx_rate: Tx rate + * @min_tx_rate: Minimum Tx rate + * @max_tx_rate: Maximum Tx rate * * configure VF Tx rate **/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 57f727bb9e36..bf67d62e2b5f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_VIRTCHNL_PF_H_ #define _I40E_VIRTCHNL_PF_H_ diff --git a/drivers/net/ethernet/intel/i40evf/Makefile b/drivers/net/ethernet/intel/i40evf/Makefile index 1e89c5487676..3c5c6e962280 100644 --- a/drivers/net/ethernet/intel/i40evf/Makefile +++ b/drivers/net/ethernet/intel/i40evf/Makefile @@ -1,29 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel Ethernet Controller XL710 Family Linux Virtual Function Driver -# Copyright(c) 2013 - 2014 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ +# Copyright(c) 2013 - 2018 Intel Corporation. # ## Makefile for the Intel(R) 40GbE VF driver diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index 6fd677efa9da..c355120dfdfd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_status.h" #include "i40e_type.h" diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h index a7137c165256..1f264b9b6805 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_ADMINQ_H_ #define _I40E_ADMINQ_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 439e71882049..aa81e87cd471 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_ADMINQ_CMD_H_ #define _I40E_ADMINQ_CMD_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h index 7e0fddd8af36..cb8689222c8b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_ALLOC_H_ #define _I40E_ALLOC_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 67140cdbcd7a..9cef54971312 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40e_type.h" #include "i40e_adminq.h" @@ -1255,6 +1231,7 @@ i40e_status_code i40evf_aq_write_ddp(struct i40e_hw *hw, void *buff, * @hw: pointer to the hw struct * @buff: command buffer (size in bytes = buff_size) * @buff_size: buffer size in bytes + * @flags: AdminQ command flags * @cmd_details: pointer to command details structure or NULL **/ enum diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h index 352dd3f3eb6a..f300bf271824 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_DEVIDS_H_ #define _I40E_DEVIDS_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h index 7432596164f4..1c78de838857 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_HMC_H_ #define _I40E_HMC_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h index ddac0e4908d3..82b00f70a632 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_LAN_HMC_H_ #define _I40E_LAN_HMC_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h index 8668ad6c1a65..3ddddb46455b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_OSDEP_H_ #define _I40E_OSDEP_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h index 72501bd0f1a9..a358f4b9d5aa 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_PROTOTYPE_H_ #define _I40E_PROTOTYPE_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_register.h b/drivers/net/ethernet/intel/i40evf/i40e_register.h index c9c935659758..49e1f57d99cc 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_register.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_register.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_REGISTER_H_ #define _I40E_REGISTER_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h index 0d7993ecb99a..77be0702d07c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_status.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_status.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_STATUS_H_ #define _I40E_STATUS_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_trace.h b/drivers/net/ethernet/intel/i40evf/i40e_trace.h index ece01dd12a3c..d7a4e68820a8 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_trace.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_trace.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel(R) 40-10 Gigabit Ethernet Virtual Function Driver - * Copyright(c) 2013 - 2017 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ /* Modeled on trace-events-sample.h */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 12bd937861e7..a9730711e257 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include <linux/prefetch.h> #include <net/busy_poll.h> @@ -129,7 +105,7 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) /** * i40evf_get_tx_pending - how many Tx descriptors not processed - * @tx_ring: the ring of descriptors + * @ring: the ring of descriptors * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register @@ -1070,6 +1046,8 @@ static inline int i40e_ptype_to_htype(u8 ptype) * i40e_rx_hash - set the hash value in the skb * @ring: descriptor ring * @rx_desc: specific descriptor + * @skb: skb currently being received and modified + * @rx_ptype: Rx packet type **/ static inline void i40e_rx_hash(struct i40e_ring *ring, union i40e_rx_desc *rx_desc, diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 5790897eae2e..3b5a63b3236e 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_TXRX_H_ #define _I40E_TXRX_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 449de4b0058e..094387db3c11 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40E_TYPE_H_ #define _I40E_TYPE_H_ @@ -1257,7 +1233,8 @@ struct i40e_hw_port_stats { /* Checksum and Shadow RAM pointers */ #define I40E_SR_NVM_CONTROL_WORD 0x00 -#define I40E_SR_EMP_MODULE_PTR 0x0F +#define I40E_EMP_MODULE_PTR 0x0F +#define I40E_SR_EMP_MODULE_PTR 0x48 #define I40E_NVM_OEM_VER_OFF 0x83 #define I40E_SR_NVM_DEV_STARTER_VERSION 0x18 #define I40E_SR_NVM_WAKE_ON_LAN 0x19 @@ -1273,6 +1250,9 @@ struct i40e_hw_port_stats { #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE 1024 #define I40E_SR_CONTROL_WORD_1_SHIFT 0x06 #define I40E_SR_CONTROL_WORD_1_MASK (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT) +#define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID BIT(5) +#define I40E_SR_NVM_MAP_STRUCTURE_TYPE BIT(12) +#define I40E_PTR_TYPE BIT(15) /* Shadow RAM related */ #define I40E_SR_SECTOR_SIZE_IN_WORDS 0x800 @@ -1386,6 +1366,10 @@ enum i40e_reset_type { I40E_RESET_EMPR = 3, }; +/* IEEE 802.1AB LLDP Agent Variables from NVM */ +#define I40E_NVM_LLDP_CFG_PTR 0x06 +#define I40E_SR_LLDP_CFG_PTR 0x31 + /* RSS Hash Table Size */ #define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000 diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 3a7a1e77bf39..98b834932dd3 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #ifndef _I40EVF_H_ #define _I40EVF_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.c b/drivers/net/ethernet/intel/i40evf/i40evf_client.c index da60ce12b33d..3cc9d60d0d72 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_client.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_client.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + #include <linux/list.h> #include <linux/errno.h> @@ -176,7 +178,6 @@ void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset) /** * i40evf_client_add_instance - add a client instance to the instance list * @adapter: pointer to the board struct - * @client: pointer to a client struct in the client list. * * Returns cinst ptr on success, NULL on failure **/ @@ -234,7 +235,6 @@ out: /** * i40evf_client_del_instance - removes a client instance from the list * @adapter: pointer to the board struct - * @client: pointer to the client struct * **/ static @@ -438,7 +438,7 @@ static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev, * i40evf_client_setup_qvlist - send a message to the PF to setup iwarp qv map * @ldev: pointer to L2 context. * @client: Client pointer. - * @qv_info: queue and vector list + * @qvlist_info: queue and vector list * * Return 0 on success or < 0 on error **/ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.h b/drivers/net/ethernet/intel/i40evf/i40evf_client.h index 15a10da5bd4a..fc6592c3de9c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_client.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf_client.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + #ifndef _I40E_CLIENT_H_ #define _I40E_CLIENT_H_ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index dc4cde274fb8..69efe0aec76a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ /* ethtool support for i40evf */ #include "i40evf.h" @@ -226,7 +202,7 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data) /** * i40evf_get_priv_flags - report device private flags - * @dev: network interface device structure + * @netdev: network interface device structure * * The get string set count and the string set should be matched for each * flag returned. Add new strings for each flag to the i40e_gstrings_priv_flags @@ -253,7 +229,7 @@ static u32 i40evf_get_priv_flags(struct net_device *netdev) /** * i40evf_set_priv_flags - set private flags - * @dev: network interface device structure + * @netdev: network interface device structure * @flags: bit flags to be set **/ static int i40evf_set_priv_flags(struct net_device *netdev, u32 flags) @@ -627,6 +603,7 @@ static int i40evf_set_per_queue_coalesce(struct net_device *netdev, * i40evf_get_rxnfc - command to get RX flow classification rules * @netdev: network interface device structure * @cmd: ethtool rxnfc command + * @rule_locs: pointer to store rule locations * * Returns Success if the command is supported. **/ @@ -746,6 +723,7 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev) * @netdev: network interface device structure * @indir: indirection table * @key: hash key + * @hfunc: hash function in use * * Reads the indirection table directly from the hardware. Always returns 0. **/ @@ -774,6 +752,7 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, * @netdev: network interface device structure * @indir: indirection table * @key: hash key + * @hfunc: hash function to use * * Returns -EINVAL if the table specifies an inavlid queue id, otherwise * returns 0 after programming the table. diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 5f71532be7f1..3f04a182903d 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40evf.h" #include "i40e_prototype.h" @@ -473,6 +449,7 @@ static void i40evf_irq_affinity_release(struct kref *ref) {} /** * i40evf_request_traffic_irqs - Initialize MSI-X interrupts * @adapter: board private structure + * @basename: device basename * * Allocates MSI-X vectors for tx and rx handling, and requests * interrupts from the kernel. @@ -705,7 +682,7 @@ i40evf_vlan_filter *i40evf_add_vlan(struct i40evf_adapter *adapter, u16 vlan) f = i40evf_find_vlan(adapter, vlan); if (!f) { - f = kzalloc(sizeof(*f), GFP_ATOMIC); + f = kzalloc(sizeof(*f), GFP_KERNEL); if (!f) goto clearout; @@ -745,6 +722,7 @@ static void i40evf_del_vlan(struct i40evf_adapter *adapter, u16 vlan) /** * i40evf_vlan_rx_add_vid - Add a VLAN filter to a device * @netdev: network device struct + * @proto: unused protocol data * @vid: VLAN tag **/ static int i40evf_vlan_rx_add_vid(struct net_device *netdev, @@ -762,6 +740,7 @@ static int i40evf_vlan_rx_add_vid(struct net_device *netdev, /** * i40evf_vlan_rx_kill_vid - Remove a VLAN filter from a device * @netdev: network device struct + * @proto: unused protocol data * @vid: VLAN tag **/ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev, @@ -3160,7 +3139,7 @@ static int i40evf_set_features(struct net_device *netdev, /** * i40evf_features_check - Validate encapsulated packet conforms to limits * @skb: skb buff - * @netdev: This physical port's netdev + * @dev: This physical port's netdev * @features: Offload features that the stack believes apply **/ static netdev_features_t i40evf_features_check(struct sk_buff *skb, @@ -3378,6 +3357,24 @@ int i40evf_process_config(struct i40evf_adapter *adapter) if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + /* Do not turn on offloads when they are requested to be turned off. + * TSO needs minimum 576 bytes to work correctly. + */ + if (netdev->wanted_features) { + if (!(netdev->wanted_features & NETIF_F_TSO) || + netdev->mtu < 576) + netdev->features &= ~NETIF_F_TSO; + if (!(netdev->wanted_features & NETIF_F_TSO6) || + netdev->mtu < 576) + netdev->features &= ~NETIF_F_TSO6; + if (!(netdev->wanted_features & NETIF_F_TSO_ECN)) + netdev->features &= ~NETIF_F_TSO_ECN; + if (!(netdev->wanted_features & NETIF_F_GRO)) + netdev->features &= ~NETIF_F_GRO; + if (!(netdev->wanted_features & NETIF_F_GSO)) + netdev->features &= ~NETIF_F_GSO; + } + adapter->vsi.id = adapter->vsi_res->vsi_id; adapter->vsi.back = adapter; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 26a59890532f..565677de5ba3 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "i40evf.h" #include "i40e_prototype.h" @@ -179,8 +155,7 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) /** * i40evf_get_vf_config - * @hw: pointer to the hardware structure - * @len: length of buffer + * @adapter: private adapter structure * * Get VF configuration from PF and populate hw structure. Must be called after * admin queue is initialized. Busy waits until response is received from PF, @@ -423,8 +398,6 @@ int i40evf_request_queues(struct i40evf_adapter *adapter, int num) /** * i40evf_add_ether_addrs * @adapter: adapter structure - * @addrs: the MAC address filters to add (contiguous) - * @count: number of filters * * Request that the PF add one or more addresses to our filters. **/ @@ -497,8 +470,6 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) /** * i40evf_del_ether_addrs * @adapter: adapter structure - * @addrs: the MAC address filters to remove (contiguous) - * @count: number of filtes * * Request that the PF remove one or more addresses from our filters. **/ @@ -571,8 +542,6 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) /** * i40evf_add_vlans * @adapter: adapter structure - * @vlans: the VLANs to add - * @count: number of VLANs * * Request that the PF add one or more VLAN filters to our VSI. **/ @@ -643,8 +612,6 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) /** * i40evf_del_vlans * @adapter: adapter structure - * @vlans: the VLANs to remove - * @count: number of VLANs * * Request that the PF remove one or more VLAN filters from our VSI. **/ diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile index c48583e98ac1..394c1e0656b9 100644 --- a/drivers/net/ethernet/intel/igb/Makefile +++ b/drivers/net/ethernet/intel/igb/Makefile @@ -1,31 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel 82575 PCI-Express Ethernet Linux driver -# Copyright(c) 1999 - 2014 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, see <http://www.gnu.org/licenses/>. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# Linux NICS <linux.nics@intel.com> -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ - +# Copyright(c) 1999 - 2018 Intel Corporation. # # Makefile for the Intel(R) 82575 PCI-Express ethernet driver # diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index dd9b6cac220d..b13b42e5a1d9 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ /* e1000_82575 * e1000_82576 diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index e53ebe97d709..6ad775b1a4c5 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_82575_H_ #define _E1000_82575_H_ diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 98534f765e0e..252440a418dc 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_DEFINES_H_ #define _E1000_DEFINES_H_ @@ -491,6 +470,8 @@ * manageability enabled, allowing us room for 15 multicast addresses. */ #define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ +#define E1000_RAH_ASEL_SRC_ADDR 0x00010000 +#define E1000_RAH_QSEL_ENABLE 0x10000000 #define E1000_RAL_MAC_ADDR_LEN 4 #define E1000_RAH_MAC_ADDR_LEN 2 #define E1000_RAH_POOL_MASK 0x03FC0000 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index ff835e1e853d..5d87957b2627 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -1,25 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_HW_H_ #define _E1000_HW_H_ diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 6f548247e6d8..c54ebedca6da 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ /* e1000_i210 * e1000_i211 diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index 56f015ccb206..5c437fdc49ee 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_I210_H_ #define _E1000_I210_H_ diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 298afa0d9159..79ee0a747260 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #include <linux/if_ether.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index 04d80c765aee..6e110f28f922 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_MAC_H_ #define _E1000_MAC_H_ diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index ef42f1689b3b..46debd991bfe 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #include "e1000_mbx.h" diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h index 4f0ecd28354d..178e60ec71d4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.h +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_MBX_H_ #define _E1000_MBX_H_ diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index e4596f151cd4..09f4dcb09632 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -1,25 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #include <linux/if_ether.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h index dde68cd54a53..091cddf4ada8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.h +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_NVM_H_ #define _E1000_NVM_H_ diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 4ec61243da82..2be0e762ec69 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #include <linux/if_ether.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 856d2cda0643..5894e4b1d0a8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_PHY_H_ #define _E1000_PHY_H_ diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index e8fa8c6530e0..0ad737d2f289 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #ifndef _E1000_REGS_H_ #define _E1000_REGS_H_ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 8dbc399b345e..9643b5b3d444 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -1,26 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ /* Linux PRO/1000 Ethernet Driver main header file */ @@ -442,6 +421,8 @@ struct hwmon_buff { enum igb_filter_match_flags { IGB_FILTER_FLAG_ETHER_TYPE = 0x1, IGB_FILTER_FLAG_VLAN_TCI = 0x2, + IGB_FILTER_FLAG_SRC_MAC_ADDR = 0x4, + IGB_FILTER_FLAG_DST_MAC_ADDR = 0x8, }; #define IGB_MAX_RXNFC_FILTERS 16 @@ -456,11 +437,14 @@ struct igb_nfc_input { u8 match_flags; __be16 etype; __be16 vlan_tci; + u8 src_addr[ETH_ALEN]; + u8 dst_addr[ETH_ALEN]; }; struct igb_nfc_filter { struct hlist_node nfc_node; struct igb_nfc_input filter; + unsigned long cookie; u16 etype_reg_index; u16 sw_idx; u16 action; @@ -474,6 +458,8 @@ struct igb_mac_addr { #define IGB_MAC_STATE_DEFAULT 0x1 #define IGB_MAC_STATE_IN_USE 0x2 +#define IGB_MAC_STATE_SRC_ADDR 0x4 +#define IGB_MAC_STATE_QUEUE_STEERING 0x8 /* board specific private data structure */ struct igb_adapter { @@ -598,6 +584,7 @@ struct igb_adapter { /* RX network flow classification support */ struct hlist_head nfc_filter_list; + struct hlist_head cls_flower_list; unsigned int nfc_filter_count; /* lock for RX network flow classification filter */ spinlock_t nfc_lock; @@ -739,4 +726,9 @@ int igb_add_filter(struct igb_adapter *adapter, int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input); +int igb_add_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags); +int igb_del_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags); + #endif /* _IGB_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index e77ba0d5866d..2d798499d35e 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ /* ethtool support for igb */ @@ -2495,6 +2474,23 @@ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, fsp->h_ext.vlan_tci = rule->filter.vlan_tci; fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); } + if (rule->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_dest, + rule->filter.dst_addr); + /* As we only support matching by the full + * mask, return the mask to userspace + */ + eth_broadcast_addr(fsp->m_u.ether_spec.h_dest); + } + if (rule->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_source, + rule->filter.src_addr); + /* As we only support matching by the full + * mask, return the mask to userspace + */ + eth_broadcast_addr(fsp->m_u.ether_spec.h_source); + } + return 0; } return -EINVAL; @@ -2768,14 +2764,41 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) { + struct e1000_hw *hw = &adapter->hw; int err = -EINVAL; + if (hw->mac.type == e1000_i210 && + !(input->filter.match_flags & ~IGB_FILTER_FLAG_SRC_MAC_ADDR)) { + dev_err(&adapter->pdev->dev, + "i210 doesn't support flow classification rules specifying only source addresses.\n"); + return -EOPNOTSUPP; + } + if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { err = igb_rxnfc_write_etype_filter(adapter, input); if (err) return err; } + if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) { + err = igb_add_mac_steering_filter(adapter, + input->filter.dst_addr, + input->action, 0); + err = min_t(int, err, 0); + if (err) + return err; + } + + if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) { + err = igb_add_mac_steering_filter(adapter, + input->filter.src_addr, + input->action, + IGB_MAC_STATE_SRC_ADDR); + err = min_t(int, err, 0); + if (err) + return err; + } + if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) err = igb_rxnfc_write_vlan_prio_filter(adapter, input); @@ -2824,6 +2847,15 @@ int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) igb_clear_vlan_prio_filter(adapter, ntohs(input->filter.vlan_tci)); + if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) + igb_del_mac_steering_filter(adapter, input->filter.src_addr, + input->action, + IGB_MAC_STATE_SRC_ADDR); + + if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) + igb_del_mac_steering_filter(adapter, input->filter.dst_addr, + input->action, 0); + return 0; } @@ -2865,7 +2897,7 @@ static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter, /* add filter to the list */ if (parent) - hlist_add_behind(&parent->nfc_node, &input->nfc_node); + hlist_add_behind(&input->nfc_node, &parent->nfc_node); else hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list); @@ -2905,10 +2937,6 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) return -EINVAL; - if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK && - fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) - return -EINVAL; - input = kzalloc(sizeof(*input), GFP_KERNEL); if (!input) return -ENOMEM; @@ -2918,6 +2946,20 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE; } + /* Only support matching addresses by the full mask */ + if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) { + input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR; + ether_addr_copy(input->filter.src_addr, + fsp->h_u.ether_spec.h_source); + } + + /* Only support matching addresses by the full mask */ + if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) { + input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR; + ether_addr_copy(input->filter.dst_addr, + fsp->h_u.ether_spec.h_dest); + } + if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { err = -EINVAL; diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c index bebe43b3a836..3b83747b2700 100644 --- a/drivers/net/ethernet/intel/igb/igb_hwmon.c +++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #include "igb.h" #include "e1000_82575.h" diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index cce7ada89255..78574c06635b 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1,26 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - */ +/* Copyright(c) 2007 - 2018 Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -36,6 +15,7 @@ #include <net/checksum.h> #include <net/ip6_checksum.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/net_tstamp.h> #include <linux/mii.h> #include <linux/ethtool.h> @@ -2513,6 +2493,250 @@ static int igb_offload_cbs(struct igb_adapter *adapter, return 0; } +#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +#define VLAN_PRIO_FULL_MASK (0x07) + +static int igb_parse_cls_flower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *f, + int traffic_class, + struct igb_nfc_filter *input) +{ + struct netlink_ext_ack *extack = f->common.extack; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported"); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key, *mask; + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + + if (!is_zero_ether_addr(mask->dst)) { + if (!is_broadcast_ether_addr(mask->dst)) { + NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address"); + return -EINVAL; + } + + input->filter.match_flags |= + IGB_FILTER_FLAG_DST_MAC_ADDR; + ether_addr_copy(input->filter.dst_addr, key->dst); + } + + if (!is_zero_ether_addr(mask->src)) { + if (!is_broadcast_ether_addr(mask->src)) { + NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address"); + return -EINVAL; + } + + input->filter.match_flags |= + IGB_FILTER_FLAG_SRC_MAC_ADDR; + ether_addr_copy(input->filter.src_addr, key->src); + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key, *mask; + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + + if (mask->n_proto) { + if (mask->n_proto != ETHER_TYPE_FULL_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter"); + return -EINVAL; + } + + input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE; + input->filter.etype = key->n_proto; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key, *mask; + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_priority) { + if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority"); + return -EINVAL; + } + + input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; + input->filter.vlan_tci = key->vlan_priority; + } + } + + input->action = traffic_class; + input->cookie = f->cookie; + + return 0; +} + +static int igb_configure_clsflower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct netlink_ext_ack *extack = cls_flower->common.extack; + struct igb_nfc_filter *filter, *f; + int err, tc; + + tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); + if (tc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class"); + return -EINVAL; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + err = igb_parse_cls_flower(adapter, cls_flower, tc, filter); + if (err < 0) + goto err_parse; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) { + if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { + err = -EEXIST; + NL_SET_ERR_MSG_MOD(extack, + "This filter is already set in ethtool"); + goto err_locked; + } + } + + hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) { + if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { + err = -EEXIST; + NL_SET_ERR_MSG_MOD(extack, + "This filter is already set in cls_flower"); + goto err_locked; + } + } + + err = igb_add_filter(adapter, filter); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter"); + goto err_locked; + } + + hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list); + + spin_unlock(&adapter->nfc_lock); + + return 0; + +err_locked: + spin_unlock(&adapter->nfc_lock); + +err_parse: + kfree(filter); + + return err; +} + +static int igb_delete_clsflower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct igb_nfc_filter *filter; + int err; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node) + if (filter->cookie == cls_flower->cookie) + break; + + if (!filter) { + err = -ENOENT; + goto out; + } + + err = igb_erase_filter(adapter, filter); + if (err < 0) + goto out; + + hlist_del(&filter->nfc_node); + kfree(filter); + +out: + spin_unlock(&adapter->nfc_lock); + + return err; +} + +static int igb_setup_tc_cls_flower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return igb_configure_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_DESTROY: + return igb_delete_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct igb_adapter *adapter = cb_priv; + + if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return igb_setup_tc_cls_flower(adapter, type_data); + + default: + return -EOPNOTSUPP; + } +} + +static int igb_setup_tc_block(struct igb_adapter *adapter, + struct tc_block_offload *f) +{ + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, igb_setup_tc_block_cb, + adapter, adapter); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -2521,6 +2745,8 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_QDISC_CBS: return igb_offload_cbs(adapter, type_data); + case TC_SETUP_BLOCK: + return igb_setup_tc_block(adapter, type_data); default: return -EOPNOTSUPP; @@ -2822,6 +3048,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac.type >= e1000_82576) netdev->features |= NETIF_F_SCTP_CRC; + if (hw->mac.type >= e1000_i350) + netdev->features |= NETIF_F_HW_TC; + #define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ NETIF_F_GSO_IPXIP4 | \ @@ -6856,8 +7085,35 @@ static void igb_set_default_mac_filter(struct igb_adapter *adapter) igb_rar_set_index(adapter, 0); } -static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, - const u8 queue) +/* If the filter to be added and an already existing filter express + * the same address and address type, it should be possible to only + * override the other configurations, for example the queue to steer + * traffic. + */ +static bool igb_mac_entry_can_be_used(const struct igb_mac_addr *entry, + const u8 *addr, const u8 flags) +{ + if (!(entry->state & IGB_MAC_STATE_IN_USE)) + return true; + + if ((entry->state & IGB_MAC_STATE_SRC_ADDR) != + (flags & IGB_MAC_STATE_SRC_ADDR)) + return false; + + if (!ether_addr_equal(addr, entry->addr)) + return false; + + return true; +} + +/* Add a MAC filter for 'addr' directing matching traffic to 'queue', + * 'flags' is used to indicate what kind of match is made, match is by + * default for the destination address, if matching by source address + * is desired the flag IGB_MAC_STATE_SRC_ADDR can be used. + */ +static int igb_add_mac_filter_flags(struct igb_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) { struct e1000_hw *hw = &adapter->hw; int rar_entries = hw->mac.rar_entry_count - @@ -6872,12 +7128,13 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, * addresses. */ for (i = 0; i < rar_entries; i++) { - if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) + if (!igb_mac_entry_can_be_used(&adapter->mac_table[i], + addr, flags)) continue; ether_addr_copy(adapter->mac_table[i].addr, addr); adapter->mac_table[i].queue = queue; - adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE; + adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE | flags; igb_rar_set_index(adapter, i); return i; @@ -6886,9 +7143,22 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, return -ENOSPC; } -static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, +static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, const u8 queue) { + return igb_add_mac_filter_flags(adapter, addr, queue, 0); +} + +/* Remove a MAC filter for 'addr' directing matching traffic to + * 'queue', 'flags' is used to indicate what kind of match need to be + * removed, match is by default for the destination address, if + * matching by source address is to be removed the flag + * IGB_MAC_STATE_SRC_ADDR can be used. + */ +static int igb_del_mac_filter_flags(struct igb_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) +{ struct e1000_hw *hw = &adapter->hw; int rar_entries = hw->mac.rar_entry_count - adapter->vfs_allocated_count; @@ -6904,14 +7174,26 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, for (i = 0; i < rar_entries; i++) { if (!(adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)) continue; + if ((adapter->mac_table[i].state & flags) != flags) + continue; if (adapter->mac_table[i].queue != queue) continue; if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) continue; - adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - adapter->mac_table[i].queue = 0; + /* When a filter for the default address is "deleted", + * we return it to its initial configuration + */ + if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) { + adapter->mac_table[i].state = + IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE; + adapter->mac_table[i].queue = + adapter->vfs_allocated_count; + } else { + adapter->mac_table[i].state = 0; + adapter->mac_table[i].queue = 0; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + } igb_rar_set_index(adapter, i); return 0; @@ -6920,6 +7202,34 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, return -ENOENT; } +static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, + const u8 queue) +{ + return igb_del_mac_filter_flags(adapter, addr, queue, 0); +} + +int igb_add_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + struct e1000_hw *hw = &adapter->hw; + + /* In theory, this should be supported on 82575 as well, but + * that part wasn't easily accessible during development. + */ + if (hw->mac.type != e1000_i210) + return -EOPNOTSUPP; + + return igb_add_mac_filter_flags(adapter, addr, queue, + IGB_MAC_STATE_QUEUE_STEERING | flags); +} + +int igb_del_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + return igb_del_mac_filter_flags(adapter, addr, queue, + IGB_MAC_STATE_QUEUE_STEERING | flags); +} + static int igb_uc_sync(struct net_device *netdev, const unsigned char *addr) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -8763,12 +9073,24 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index) if (is_valid_ether_addr(addr)) rar_high |= E1000_RAH_AV; - if (hw->mac.type == e1000_82575) + if (adapter->mac_table[index].state & IGB_MAC_STATE_SRC_ADDR) + rar_high |= E1000_RAH_ASEL_SRC_ADDR; + + switch (hw->mac.type) { + case e1000_82575: + case e1000_i210: + if (adapter->mac_table[index].state & + IGB_MAC_STATE_QUEUE_STEERING) + rar_high |= E1000_RAH_QSEL_ENABLE; + rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue; - else + break; + default: rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue; + break; + } } wr32(E1000_RAL(index), rar_low); @@ -9206,6 +9528,9 @@ static void igb_nfc_filter_exit(struct igb_adapter *adapter) hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) igb_erase_filter(adapter, rule); + hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node) + igb_erase_filter(adapter, rule); + spin_unlock(&adapter->nfc_lock); } diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 7454b9895a65..9f4d700e09df 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1,21 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ -/* PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580 - * - * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, see <http://www.gnu.org/licenses/>. - */ +/* Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com> */ + #include <linux/module.h> #include <linux/device.h> #include <linux/pci.h> diff --git a/drivers/net/ethernet/intel/igbvf/Makefile b/drivers/net/ethernet/intel/igbvf/Makefile index efe29dae384a..afd3e36eae75 100644 --- a/drivers/net/ethernet/intel/igbvf/Makefile +++ b/drivers/net/ethernet/intel/igbvf/Makefile @@ -1,31 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel(R) 82576 Virtual Function Linux driver -# Copyright(c) 2009 - 2012 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ - +# Copyright(c) 2009 - 2018 Intel Corporation. # # Makefile for the Intel(R) 82576 VF ethernet driver # diff --git a/drivers/net/ethernet/intel/igbvf/defines.h b/drivers/net/ethernet/intel/igbvf/defines.h index 04bcfec0641b..4437f832412d 100644 --- a/drivers/net/ethernet/intel/igbvf/defines.h +++ b/drivers/net/ethernet/intel/igbvf/defines.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000_DEFINES_H_ #define _E1000_DEFINES_H_ diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index ca39e3cccaeb..3ae358b35227 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 2009 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2009 - 2018 Intel Corporation. */ /* ethtool support for igbvf */ diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h index f5bf248e22eb..eee26a3be90b 100644 --- a/drivers/net/ethernet/intel/igbvf/igbvf.h +++ b/drivers/net/ethernet/intel/igbvf/igbvf.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 2009 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2009 - 2018 Intel Corporation. */ /* Linux PRO/1000 Ethernet Driver main header file */ diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c index 9195884096f8..163e5838f7c2 100644 --- a/drivers/net/ethernet/intel/igbvf/mbx.c +++ b/drivers/net/ethernet/intel/igbvf/mbx.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 2009 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2009 - 2018 Intel Corporation. */ #include "mbx.h" diff --git a/drivers/net/ethernet/intel/igbvf/mbx.h b/drivers/net/ethernet/intel/igbvf/mbx.h index 479b062fe9ee..e5b31818d565 100644 --- a/drivers/net/ethernet/intel/igbvf/mbx.h +++ b/drivers/net/ethernet/intel/igbvf/mbx.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _E1000_MBX_H_ #define _E1000_MBX_H_ diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index e2b7502f1953..f818f060e5a7 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 2009 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2009 - 2018 Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/intel/igbvf/regs.h b/drivers/net/ethernet/intel/igbvf/regs.h index 614e52409f11..625a309a3355 100644 --- a/drivers/net/ethernet/intel/igbvf/regs.h +++ b/drivers/net/ethernet/intel/igbvf/regs.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 2009 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2009 - 2018 Intel Corporation. */ #ifndef _E1000_REGS_H_ #define _E1000_REGS_H_ diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c index bfe8d8297b2e..b8ba3f94c363 100644 --- a/drivers/net/ethernet/intel/igbvf/vf.c +++ b/drivers/net/ethernet/intel/igbvf/vf.c @@ -1,29 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 2009 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2009 - 2018 Intel Corporation. */ #include "vf.h" diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h index 193b50026246..c71b0d7dbcee 100644 --- a/drivers/net/ethernet/intel/igbvf/vf.h +++ b/drivers/net/ethernet/intel/igbvf/vf.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel(R) 82576 Virtual Function Linux driver - Copyright(c) 2009 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2009 - 2018 Intel Corporation. */ #ifndef _E1000_VF_H_ #define _E1000_VF_H_ diff --git a/drivers/net/ethernet/intel/ixgb/Makefile b/drivers/net/ethernet/intel/ixgb/Makefile index 1b42dd554dd2..2433e9300a33 100644 --- a/drivers/net/ethernet/intel/ixgb/Makefile +++ b/drivers/net/ethernet/intel/ixgb/Makefile @@ -1,33 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel PRO/10GbE Linux driver # Copyright(c) 1999 - 2008 Intel Corporation. # -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# Linux NICS <linux.nics@intel.com> -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ - -# # Makefile for the Intel(R) PRO/10GbE ethernet driver # diff --git a/drivers/net/ethernet/intel/ixgb/ixgb.h b/drivers/net/ethernet/intel/ixgb/ixgb.h index 92022841755f..e85271b68410 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb.h +++ b/drivers/net/ethernet/intel/ixgb/ixgb.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2008 Intel Corporation. */ #ifndef _IXGB_H_ #define _IXGB_H_ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.c b/drivers/net/ethernet/intel/ixgb/ixgb_ee.c index eca216b9b859..129286fc1634 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_ee.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2008 Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h index 475297a810fe..3ee0a09e5d0a 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h +++ b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2008 Intel Corporation. */ #ifndef _IXGB_EE_H_ #define _IXGB_EE_H_ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c index d10a0d242dda..43744bf0fc1c 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2008 Intel Corporation. */ /* ethtool support for ixgb */ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c index bf9a220f71fb..cbaa933ef30d 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2008 Intel Corporation. */ /* ixgb_hw.c * Shared functions for accessing and configuring the adapter diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h index 19f36d87ef61..6064583095da 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h +++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2008 Intel Corporation. */ #ifndef _IXGB_HW_H_ #define _IXGB_HW_H_ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h index 24e849902d60..9695b8215f01 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h +++ b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2008 Intel Corporation. */ #ifndef _IXGB_IDS_H_ #define _IXGB_IDS_H_ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 2353c383f0a7..62f2173bc20e 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2008 Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h index b1710379192e..7bd54efa698d 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h +++ b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2008 Intel Corporation. */ /* glue for the OS independent part of ixgb * includes register access macros diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_param.c b/drivers/net/ethernet/intel/ixgb/ixgb_param.c index 04a60640ddda..f0cadd532c53 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_param.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_param.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel PRO/10GbE Linux driver - Copyright(c) 1999 - 2008 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2008 Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 4cd96c88cb5d..5414685189ce 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -1,32 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel 10 Gigabit PCI Express Linux driver -# Copyright(c) 1999 - 2013 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# Linux NICS <linux.nics@intel.com> -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ - +# Copyright(c) 1999 - 2018 Intel Corporation. # # Makefile for the Intel(R) 10GbE PCI Express ethernet driver # diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 4f08c712e58e..fc534e91c6b2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_H_ #define _IXGBE_H_ @@ -241,8 +215,7 @@ struct ixgbe_tx_buffer { unsigned long time_stamp; union { struct sk_buff *skb; - /* XDP uses address ptr on irq_clean */ - void *data; + struct xdp_frame *xdpf; }; unsigned int bytecount; unsigned short gso_segs; @@ -306,7 +279,6 @@ enum ixgbe_ring_state_t { struct ixgbe_fwd_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct net_device *netdev; - struct ixgbe_adapter *real_adapter; unsigned int tx_base_queue; unsigned int rx_base_queue; int pool; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index cb0fe5fedb33..eee277c1bedf 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/pci.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 66a74f4651e8..42f63b943ea0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/pci.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 633be93f3dbb..8d038837f72b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/pci.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 2b311382167a..4b531e8ae38a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_COMMON_H_ #define _IXGBE_COMMON_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c index aaea8282bfd2..d26cea5b43bd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c @@ -1,31 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe.h" #include "ixgbe_type.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h index 73b6362d4327..60cd5863bf5e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _DCB_CONFIG_H_ #define _DCB_CONFIG_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c index 085130626330..379ae747cdce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c @@ -1,31 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe.h" #include "ixgbe_type.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h index 7edce607f901..fdca41abb44c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _DCB_82598_CONFIG_H_ #define _DCB_82598_CONFIG_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c index 1eed6811e914..7948849840a5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe.h" #include "ixgbe_type.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h index fa030f0abc18..c6f084883cab 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _DCB_82599_CONFIG_H_ #define _DCB_82599_CONFIG_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index b33f3f87e4b1..c00332d2e02a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe.h" #include <linux/dcbnl.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c index ad54080488ee..55fe8114fe99 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -1,30 +1,6 @@ -/******************************************************************************* +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ #include <linux/debugfs.h> #include <linux/module.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index c0e6ab42e0e1..bdd179c29ea4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* ethtool support for ixgbe */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 7a09a40e4472..b5219e024f70 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe.h" #include <linux/if_ether.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h index cf1919901514..724f5382329f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_FCOE_H #define _IXGBE_FCOE_H diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 68af127987bc..41af2b81e960 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -1,29 +1,5 @@ -/******************************************************************************* - * - * Intel 10 Gigabit PCI Express Linux driver - * Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */ #include "ixgbe.h" #include <net/xfrm.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h index 4f099f516645..9ef7faadda69 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h @@ -1,30 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */ #ifndef _IXGBE_IPSEC_H_ #define _IXGBE_IPSEC_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index ed4cbe94c355..893a9206e718 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe.h" #include "ixgbe_sriov.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index afadba99f7b8..6652b201df5b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/types.h> #include <linux/module.h> @@ -1216,7 +1191,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, /* free the skb */ if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); + xdp_return_frame(tx_buffer->xdpf); else napi_consume_skb(tx_buffer->skb, napi_budget); @@ -1768,15 +1743,14 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) ixgbe_ipsec_rx(rx_ring, rx_desc, skb); - skb->protocol = eth_type_trans(skb, dev); - /* record Rx queue, or update MACVLAN statistics */ if (netif_is_ixgbe(dev)) skb_record_rx_queue(skb, rx_ring->queue_index); else macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true, - (skb->pkt_type == PACKET_BROADCAST) || - (skb->pkt_type == PACKET_MULTICAST)); + false); + + skb->protocol = eth_type_trans(skb, dev); } static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, @@ -2262,7 +2236,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, #define IXGBE_XDP_TX 2 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, - struct xdp_buff *xdp); + struct xdp_frame *xdpf); static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring, @@ -2270,6 +2244,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, { int err, result = IXGBE_XDP_PASS; struct bpf_prog *xdp_prog; + struct xdp_frame *xdpf; u32 act; rcu_read_lock(); @@ -2278,12 +2253,19 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, if (!xdp_prog) goto xdp_out; + prefetchw(xdp->data_hard_start); /* xdp_frame write */ + act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: break; case XDP_TX: - result = ixgbe_xmit_xdp_ring(adapter, xdp); + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) { + result = IXGBE_XDP_CONSUMED; + break; + } + result = ixgbe_xmit_xdp_ring(adapter, xdpf); break; case XDP_REDIRECT: err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); @@ -4211,7 +4193,8 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter) static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 reg_offset, vf_shift; + u16 pool = adapter->num_rx_pools; + u32 reg_offset, vf_shift, vmolr; u32 gcr_ext, vmdctl; int i; @@ -4225,6 +4208,13 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) vmdctl |= IXGBE_VT_CTL_REPLEN; IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl); + /* accept untagged packets until a vlan tag is + * specifically set for the VMDQ queue/pool + */ + vmolr = IXGBE_VMOLR_AUPE; + while (pool--) + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr); + vf_shift = VMDQ_P(0) % 32; reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0; @@ -4892,36 +4882,6 @@ int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, return -ENOMEM; } -/** - * ixgbe_write_uc_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure - * @vfn: pool to associate with unicast addresses - * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table - **/ -static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - int count = 0; - - /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn)) - return -ENOMEM; - - if (!netdev_uc_empty(netdev)) { - struct netdev_hw_addr *ha; - netdev_for_each_uc_addr(ha, netdev) { - ixgbe_del_mac_filter(adapter, ha->addr, vfn); - ixgbe_add_mac_filter(adapter, ha->addr, vfn); - count++; - } - } - return count; -} - static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) { struct ixgbe_adapter *adapter = netdev_priv(netdev); @@ -5301,29 +5261,6 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) spin_unlock(&adapter->fdir_perfect_lock); } -static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool, - struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vmolr; - - /* No unicast promiscuous support for VMDQ devices. */ - vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool)); - vmolr |= (IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE); - - /* clear the affected bit */ - vmolr &= ~IXGBE_VMOLR_MPE; - - if (dev->flags & IFF_ALLMULTI) { - vmolr |= IXGBE_VMOLR_MPE; - } else { - vmolr |= IXGBE_VMOLR_ROMPE; - hw->mac.ops.update_mc_addr_list(hw, dev); - } - ixgbe_write_uc_addr_list(adapter->netdev, pool); - IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr); -} - /** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from @@ -5376,21 +5313,17 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) rx_ring->next_to_use = 0; } -static int ixgbe_fwd_ring_up(struct net_device *vdev, +static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter, struct ixgbe_fwd_adapter *accel) { - struct ixgbe_adapter *adapter = accel->real_adapter; + struct net_device *vdev = accel->netdev; int i, baseq, err; - if (!test_bit(accel->pool, adapter->fwd_bitmask)) - return 0; - baseq = accel->pool * adapter->num_rx_queues_per_pool; netdev_dbg(vdev, "pool %i:%i queues %i:%i\n", accel->pool, adapter->num_rx_pools, baseq, baseq + adapter->num_rx_queues_per_pool); - accel->netdev = vdev; accel->rx_base_queue = baseq; accel->tx_base_queue = baseq; @@ -5407,26 +5340,36 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev, */ err = ixgbe_add_mac_filter(adapter, vdev->dev_addr, VMDQ_P(accel->pool)); - if (err >= 0) { - ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter); + if (err >= 0) return 0; - } + + /* if we cannot add the MAC rule then disable the offload */ + macvlan_release_l2fw_offload(vdev); for (i = 0; i < adapter->num_rx_queues_per_pool; i++) adapter->rx_ring[baseq + i]->netdev = NULL; + netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n"); + + clear_bit(accel->pool, adapter->fwd_bitmask); + kfree(accel); + return err; } -static int ixgbe_upper_dev_walk(struct net_device *upper, void *data) +static int ixgbe_macvlan_up(struct net_device *vdev, void *data) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + struct ixgbe_adapter *adapter = data; + struct ixgbe_fwd_adapter *accel; - if (dfwd->fwd_priv) - ixgbe_fwd_ring_up(upper, vadapter); - } + if (!netif_is_macvlan(vdev)) + return 0; + + accel = macvlan_accel_priv(vdev); + if (!accel) + return 0; + + ixgbe_fwd_ring_up(adapter, accel); return 0; } @@ -5434,7 +5377,7 @@ static int ixgbe_upper_dev_walk(struct net_device *upper, void *data) static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) { netdev_walk_all_upper_dev_rcu(adapter->netdev, - ixgbe_upper_dev_walk, NULL); + ixgbe_macvlan_up, adapter); } static void ixgbe_configure(struct ixgbe_adapter *adapter) @@ -5797,7 +5740,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring) /* Free all the Tx ring sk_buffs */ if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); + xdp_return_frame(tx_buffer->xdpf); else dev_kfree_skb_any(tx_buffer->skb); @@ -6370,7 +6313,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, struct device *dev = rx_ring->dev; int orig_node = dev_to_node(dev); int ring_node = -1; - int size; + int size, err; size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; @@ -6407,6 +6350,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, rx_ring->queue_index) < 0) goto err; + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err) { + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + goto err; + } + rx_ring->xdp_prog = adapter->xdp_prog; return 0; @@ -8336,7 +8286,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, } static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, - struct xdp_buff *xdp) + struct xdp_frame *xdpf) { struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; struct ixgbe_tx_buffer *tx_buffer; @@ -8345,12 +8295,12 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, dma_addr_t dma; u16 i; - len = xdp->data_end - xdp->data; + len = xdpf->len; if (unlikely(!ixgbe_desc_unused(ring))) return IXGBE_XDP_CONSUMED; - dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); + dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE); if (dma_mapping_error(ring->dev, dma)) return IXGBE_XDP_CONSUMED; @@ -8365,7 +8315,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, dma_unmap_len_set(tx_buffer, len, len); dma_unmap_addr_set(tx_buffer, dma, dma); - tx_buffer->data = xdp->data; + tx_buffer->xdpf = xdpf; + tx_desc->read.buffer_addr = cpu_to_le64(dma); /* put descriptor type bits */ @@ -8827,6 +8778,49 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter) } #endif /* CONFIG_IXGBE_DCB */ +static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data) +{ + struct ixgbe_adapter *adapter = data; + struct ixgbe_fwd_adapter *accel; + int pool; + + /* we only care about macvlans... */ + if (!netif_is_macvlan(vdev)) + return 0; + + /* that have hardware offload enabled... */ + accel = macvlan_accel_priv(vdev); + if (!accel) + return 0; + + /* If we can relocate to a different bit do so */ + pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); + if (pool < adapter->num_rx_pools) { + set_bit(pool, adapter->fwd_bitmask); + accel->pool = pool; + return 0; + } + + /* if we cannot find a free pool then disable the offload */ + netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n"); + macvlan_release_l2fw_offload(vdev); + kfree(accel); + + return 0; +} + +static void ixgbe_defrag_macvlan_pools(struct net_device *dev) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + /* flush any stale bits out of the fwd bitmask */ + bitmap_clear(adapter->fwd_bitmask, 1, 63); + + /* walk through upper devices reassigning pools */ + netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool, + adapter); +} + /** * ixgbe_setup_tc - configure net_device for multiple traffic classes * @@ -8894,6 +8888,8 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) #endif /* CONFIG_IXGBE_DCB */ ixgbe_init_interrupt_scheme(adapter); + ixgbe_defrag_macvlan_pools(dev); + if (netif_running(dev)) return ixgbe_open(dev); @@ -8998,13 +8994,12 @@ struct upper_walk_data { static int get_macvlan_queue(struct net_device *upper, void *_data) { if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper); struct upper_walk_data *data = _data; struct ixgbe_adapter *adapter = data->adapter; int ifindex = data->ifindex; - if (vadapter && vadapter->netdev->ifindex == ifindex) { + if (vadapter && upper->ifindex == ifindex) { data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; data->action = data->queue; return 1; @@ -9444,6 +9439,22 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev, return features; } +static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter) +{ + int rss = min_t(int, ixgbe_max_rss_indices(adapter), + num_online_cpus()); + + /* go back to full RSS if we're not running SR-IOV */ + if (!adapter->ring_feature[RING_F_VMDQ].offset) + adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED | + IXGBE_FLAG_SRIOV_ENABLED); + + adapter->ring_feature[RING_F_RSS].limit = rss; + adapter->ring_feature[RING_F_VMDQ].limit = 1; + + ixgbe_setup_tc(adapter->netdev, adapter->hw_tcs); +} + static int ixgbe_set_features(struct net_device *netdev, netdev_features_t features) { @@ -9524,7 +9535,9 @@ static int ixgbe_set_features(struct net_device *netdev, } } - if (need_reset) + if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1) + ixgbe_reset_l2fw_offload(adapter); + else if (need_reset) ixgbe_do_reset(netdev); else if (changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER)) @@ -9787,71 +9800,98 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) { - struct ixgbe_fwd_adapter *fwd_adapter = NULL; struct ixgbe_adapter *adapter = netdev_priv(pdev); - int used_pools = adapter->num_vfs + adapter->num_rx_pools; + struct ixgbe_fwd_adapter *accel; int tcs = adapter->hw_tcs ? : 1; - unsigned int limit; int pool, err; - /* Hardware has a limited number of available pools. Each VF, and the - * PF require a pool. Check to ensure we don't attempt to use more - * then the available number of pools. + /* The hardware supported by ixgbe only filters on the destination MAC + * address. In order to avoid issues we only support offloading modes + * where the hardware can actually provide the functionality. */ - if (used_pools >= IXGBE_MAX_VF_FUNCTIONS) - return ERR_PTR(-EINVAL); + if (!macvlan_supports_dest_filter(vdev)) + return ERR_PTR(-EMEDIUMTYPE); - if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && - adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) || - (adapter->num_rx_pools > IXGBE_MAX_MACVLANS)) - return ERR_PTR(-EBUSY); + pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); + if (pool == adapter->num_rx_pools) { + u16 used_pools = adapter->num_vfs + adapter->num_rx_pools; + u16 reserved_pools; + + if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && + adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) || + adapter->num_rx_pools > IXGBE_MAX_MACVLANS) + return ERR_PTR(-EBUSY); + + /* Hardware has a limited number of available pools. Each VF, + * and the PF require a pool. Check to ensure we don't + * attempt to use more then the available number of pools. + */ + if (used_pools >= IXGBE_MAX_VF_FUNCTIONS) + return ERR_PTR(-EBUSY); - fwd_adapter = kzalloc(sizeof(*fwd_adapter), GFP_KERNEL); - if (!fwd_adapter) - return ERR_PTR(-ENOMEM); + /* Enable VMDq flag so device will be set in VM mode */ + adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | + IXGBE_FLAG_SRIOV_ENABLED; - pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); - set_bit(pool, adapter->fwd_bitmask); - limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools + 1); + /* Try to reserve as many queues per pool as possible, + * we start with the configurations that support 4 queues + * per pools, followed by 2, and then by just 1 per pool. + */ + if (used_pools < 32 && adapter->num_rx_pools < 16) + reserved_pools = min_t(u16, + 32 - used_pools, + 16 - adapter->num_rx_pools); + else if (adapter->num_rx_pools < 32) + reserved_pools = min_t(u16, + 64 - used_pools, + 32 - adapter->num_rx_pools); + else + reserved_pools = 64 - used_pools; - /* Enable VMDq flag so device will be set in VM mode */ - adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED; - adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; - fwd_adapter->pool = pool; - fwd_adapter->real_adapter = adapter; + if (!reserved_pools) + return ERR_PTR(-EBUSY); - /* Force reinit of ring allocation with VMDQ enabled */ - err = ixgbe_setup_tc(pdev, adapter->hw_tcs); + adapter->ring_feature[RING_F_VMDQ].limit += reserved_pools; - if (!err && netif_running(pdev)) - err = ixgbe_fwd_ring_up(vdev, fwd_adapter); + /* Force reinit of ring allocation with VMDQ enabled */ + err = ixgbe_setup_tc(pdev, adapter->hw_tcs); + if (err) + return ERR_PTR(err); - if (!err) - return fwd_adapter; + if (pool >= adapter->num_rx_pools) + return ERR_PTR(-ENOMEM); + } - /* unwind counter and free adapter struct */ - netdev_info(pdev, - "%s: dfwd hardware acceleration failed\n", vdev->name); - clear_bit(pool, adapter->fwd_bitmask); - kfree(fwd_adapter); - return ERR_PTR(err); + accel = kzalloc(sizeof(*accel), GFP_KERNEL); + if (!accel) + return ERR_PTR(-ENOMEM); + + set_bit(pool, adapter->fwd_bitmask); + accel->pool = pool; + accel->netdev = vdev; + + if (!netif_running(pdev)) + return accel; + + err = ixgbe_fwd_ring_up(adapter, accel); + if (err) + return ERR_PTR(err); + + return accel; } static void ixgbe_fwd_del(struct net_device *pdev, void *priv) { struct ixgbe_fwd_adapter *accel = priv; - struct ixgbe_adapter *adapter = accel->real_adapter; + struct ixgbe_adapter *adapter = netdev_priv(pdev); unsigned int rxbase = accel->rx_base_queue; - unsigned int limit, i; + unsigned int i; /* delete unicast filter associated with offloaded interface */ ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr, VMDQ_P(accel->pool)); - /* disable ability to receive packets for this pool */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(accel->pool), 0); - /* Allow remaining Rx packets to get flushed out of the * Rx FIFO before we drop the netdev for the ring. */ @@ -9870,25 +9910,6 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) } clear_bit(accel->pool, adapter->fwd_bitmask); - limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools); - adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; - - /* go back to full RSS if we're done with our VMQs */ - if (adapter->ring_feature[RING_F_VMDQ].limit == 1) { - int rss = min_t(int, ixgbe_max_rss_indices(adapter), - num_online_cpus()); - - adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; - adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; - adapter->ring_feature[RING_F_RSS].limit = rss; - } - - ixgbe_setup_tc(pdev, adapter->hw_tcs); - netdev_dbg(pdev, "pool %i:%i queues %i:%i\n", - accel->pool, adapter->num_rx_pools, - accel->rx_base_queue, - accel->rx_base_queue + - adapter->num_rx_queues_per_pool); kfree(accel); } @@ -9996,7 +10017,7 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ring *ring; @@ -10012,7 +10033,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) if (unlikely(!ring)) return -ENXIO; - err = ixgbe_xmit_xdp_ring(adapter, xdp); + err = ixgbe_xmit_xdp_ring(adapter, xdpf); if (err != IXGBE_XDP_TX) return -ENOSPC; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c index a0cb84381cd0..5679293e53f7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/pci.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index c4628b663590..e085b6520dac 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_MBX_H_ #define _IXGBE_MBX_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h index 72446644f9fa..fcae520647ce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel 10 Gigabit PCI Express Linux drive - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_MODEL_H_ #define _IXGBE_MODEL_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 91bde90f9265..919a7af84b42 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/pci.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index d6a7e77348c5..64e44e01c973 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_PHY_H_ #define _IXGBE_PHY_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index f6cc9166082a..b3e0d8bb5cbd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1,30 +1,6 @@ -/******************************************************************************* +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ #include "ixgbe.h" #include <linux/ptp_classify.h> #include <linux/clocksource.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 008aa073a679..2649c06d877b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/types.h> #include <linux/module.h> @@ -266,7 +241,7 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) #endif /* Disable VMDq flag so device will be set in VM mode */ - if (adapter->ring_feature[RING_F_VMDQ].limit == 1) { + if (bitmap_weight(adapter->fwd_bitmask, adapter->num_rx_pools) == 1) { adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; rss = min_t(int, ixgbe_max_rss_indices(adapter), @@ -312,7 +287,8 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) * other values out of range. */ num_tc = adapter->hw_tcs; - num_rx_pools = adapter->num_rx_pools; + num_rx_pools = bitmap_weight(adapter->fwd_bitmask, + adapter->num_rx_pools); limit = (num_tc > 4) ? IXGBE_MAX_VFS_8TC : (num_tc > 1) ? IXGBE_MAX_VFS_4TC : IXGBE_MAX_VFS_1TC; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index e30d1f07e891..3ec21923c89c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_SRIOV_H_ #define _IXGBE_SRIOV_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c index 24766e125592..204844288c16 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe.h" #include "ixgbe_common.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2daa81e6e9b2..e8ed37749ab1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1,31 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_TYPE_H_ #define _IXGBE_TYPE_H_ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index b8c5fd2a2115..de563cfd294d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -1,30 +1,5 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2016 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS <linux.nics@intel.com> - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include <linux/pci.h> #include <linux/delay.h> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h index 182d640e9f7a..e246c0d2a427 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h @@ -1,27 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - * - * Intel 10 Gigabit PCI Express Linux driver - * Copyright(c) 1999 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - *****************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "ixgbe_type.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 3123267dfba9..959a37599a6b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1,26 +1,6 @@ -/******************************************************************************* - * - * Intel 10 Gigabit PCI Express Linux driver - * Copyright(c) 1999 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * Linux NICS <linux.nics@intel.com> - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ + #include "ixgbe_x540.h" #include "ixgbe_type.h" #include "ixgbe_common.h" diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile index bb47814cfa90..aba1e6a37a6a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/Makefile +++ b/drivers/net/ethernet/intel/ixgbevf/Makefile @@ -1,31 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -################################################################################ -# -# Intel 82599 Virtual Function driver -# Copyright(c) 1999 - 2012 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Contact Information: -# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> -# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 -# -################################################################################ - +# Copyright(c) 1999 - 2018 Intel Corporation. # # Makefile for the Intel(R) 82599 VF ethernet driver # diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index 71c828842b11..700d8eb2f6f8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBEVF_DEFINES_H_ #define _IXGBEVF_DEFINES_H_ diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 8e7d6c6f5c92..e7813d76527c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -1,28 +1,5 @@ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2018 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /* ethtool support for ixgbevf */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 447ce1d5e0e3..70c75681495f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2018 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBEVF_H_ #define _IXGBEVF_H_ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e3d04f226d57..e91c3d1a43ce 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1,28 +1,5 @@ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2018 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ /****************************************************************************** Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c index 2819abc454c7..6bc1953263b9 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.c +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c @@ -1,28 +1,5 @@ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "mbx.h" #include "ixgbevf.h" diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index 5ec947fe3d09..bfd9ae150808 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBE_MBX_H_ #define _IXGBE_MBX_H_ diff --git a/drivers/net/ethernet/intel/ixgbevf/regs.h b/drivers/net/ethernet/intel/ixgbevf/regs.h index 278f73980501..68d16ae5b65a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/regs.h +++ b/drivers/net/ethernet/intel/ixgbevf/regs.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef _IXGBEVF_REGS_H_ #define _IXGBEVF_REGS_H_ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 38d3a327c1bc..bf0577e819e1 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -1,28 +1,5 @@ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "vf.h" #include "ixgbevf.h" diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 194fbdaa4519..d1e9e306653b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -1,29 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/******************************************************************************* - - Intel 82599 Virtual Function driver - Copyright(c) 1999 - 2015 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see <http://www.gnu.org/licenses/>. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Copyright(c) 1999 - 2018 Intel Corporation. */ #ifndef __IXGBE_VF_H__ #define __IXGBE_VF_H__ diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e0b72bf428f7..d8ebf0a05e0c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2503,7 +2503,6 @@ static int mtk_probe(struct platform_device *pdev) { struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); struct device_node *mac_np; - const struct of_device_id *match; struct mtk_eth *eth; int err; int i; @@ -2512,8 +2511,7 @@ static int mtk_probe(struct platform_device *pdev) if (!eth) return -ENOMEM; - match = of_match_device(of_mtk_match, &pdev->dev); - eth->soc = (struct mtk_soc_data *)match->data; + eth->soc = of_device_get_match_data(&pdev->dev); eth->dev = &pdev->dev; eth->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5c613c6663da..efc55feddc5c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud act = bpf_prog_run_xdp(xdp_prog, &xdp); + length = xdp.data_end - xdp.data; if (xdp.data != orig_data) { - length = xdp.data_end - xdp.data; frags[0].page_offset = xdp.data - xdp.data_hard_start; va = xdp.data; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 6b6853773848..0227786308af 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -694,7 +694,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, u16 rings_p_up = priv->num_tx_rings_p_up; if (netdev_get_num_tc(dev)) - return skb_tx_hash(dev, skb); + return fallback(dev, skb); return fallback(dev, skb) % rings_p_up; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index c032319f1cb9..ee6684779d11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -30,6 +30,7 @@ config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m + select PAGE_POOL default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. @@ -85,3 +86,14 @@ config MLX5_EN_IPSEC Build support for IPsec cryptography-offload accelaration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. + +config MLX5_EN_TLS + bool "TLS cryptography-offload accelaration" + depends on MLX5_CORE_EN + depends on TLS_DEVICE + depends on MLX5_ACCEL + default n + ---help--- + Build support for TLS cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index c805769d92a9..a7135f5d5cf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -8,10 +8,10 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \ diag/fs_tracepoint.o -mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o +mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ - fpga/ipsec.o + fpga/ipsec.o fpga/tls.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en_stats.o vxlan.o \ @@ -28,4 +28,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o + CFLAGS_tracepoint.o := -I$(src) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c new file mode 100644 index 000000000000..77ac19f38cbe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "accel/tls.h" +#include "mlx5_core.h" +#include "fpga/tls.h" + +int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid) +{ + return mlx5_fpga_tls_add_tx_flow(mdev, flow, crypto_info, + start_offload_tcp_sn, p_swid); +} + +void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid) +{ + mlx5_fpga_tls_del_tx_flow(mdev, swid, GFP_KERNEL); +} + +bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_is_tls_device(mdev); +} + +u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_tls_device_caps(mdev); +} + +int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_tls_init(mdev); +} + +void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) +{ + mlx5_fpga_tls_cleanup(mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h new file mode 100644 index 000000000000..6f9c9f446ecc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_ACCEL_TLS_H__ +#define __MLX5_ACCEL_TLS_H__ + +#include <linux/mlx5/driver.h> +#include <linux/tls.h> + +#ifdef CONFIG_MLX5_ACCEL + +enum { + MLX5_ACCEL_TLS_TX = BIT(0), + MLX5_ACCEL_TLS_RX = BIT(1), + MLX5_ACCEL_TLS_V12 = BIT(2), + MLX5_ACCEL_TLS_V13 = BIT(3), + MLX5_ACCEL_TLS_LRO = BIT(4), + MLX5_ACCEL_TLS_IPV6 = BIT(5), + MLX5_ACCEL_TLS_AES_GCM128 = BIT(30), + MLX5_ACCEL_TLS_AES_GCM256 = BIT(31), +}; + +struct mlx5_ifc_tls_flow_bits { + u8 src_port[0x10]; + u8 dst_port[0x10]; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; + u8 ipv6[0x1]; + u8 direction_sx[0x1]; + u8 reserved_at_2[0x1e]; +}; + +int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid); +void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid); +bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev); +u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev); +int mlx5_accel_tls_init(struct mlx5_core_dev *mdev); +void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev); + +#else + +static inline int +mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid) { return 0; } +static inline void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid) { } +static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; } +static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } +static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } +static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } + +#endif + +#endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 30cad07be2b5..9cc07da09b70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -53,6 +53,11 @@ #include "mlx5_core.h" #include "en_stats.h" +struct page_pool; + +#define MLX5E_METADATA_ETHER_TYPE (0x8CE4) +#define MLX5E_METADATA_ETHER_LEN 8 + #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) @@ -239,6 +244,7 @@ struct mlx5e_params { bool vlan_strip_disable; bool scatter_fcs_en; bool rx_dim_enabled; + bool tx_dim_enabled; u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; @@ -328,6 +334,8 @@ enum { MLX5E_SQ_STATE_ENABLED, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, + MLX5E_SQ_STATE_AM, + MLX5E_SQ_STATE_TLS, }; struct mlx5e_sq_wqe_info { @@ -340,6 +348,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16 cc; u32 dma_fifo_cc; + struct net_dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -392,6 +401,7 @@ struct mlx5e_xdpsq { struct { struct mlx5e_dma_info *di; bool doorbell; + bool redirect_flush; } db; /* read only */ @@ -533,6 +543,7 @@ struct mlx5e_rq { unsigned int hw_mtu; struct mlx5e_xdpsq xdpsq; DECLARE_BITMAP(flags, 8); + struct page_pool *page_pool; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -790,6 +801,9 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_IPSEC struct mlx5e_ipsec *ipsec; #endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_tls *tls; +#endif }; struct mlx5e_profile { @@ -820,6 +834,8 @@ void mlx5e_build_ptys2ethtool_map(void); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe *wqe, u16 pi); void mlx5e_completion_event(struct mlx5_core_cq *mcq); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); @@ -935,6 +951,18 @@ static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); } +static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe **wqe, + u16 *pi) +{ + struct mlx5_wq_cyc *wq; + + wq = &sq->wq; + *pi = sq->pc & wq->sz_m1; + *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(*wqe, 0, sizeof(**wqe)); +} + static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { @@ -1107,4 +1135,5 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, u16 max_channels, u16 mtu); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); +void mlx5e_tx_dim_work(struct work_struct *work); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h new file mode 100644 index 000000000000..68fcb40a2847 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_EN_ACCEL_H__ +#define __MLX5E_EN_ACCEL_H__ + +#ifdef CONFIG_MLX5_ACCEL + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/tls_rxtx.h" +#include "en.h" + +static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb, + struct mlx5e_txqsq *sq, + struct net_device *dev, + struct mlx5e_tx_wqe **wqe, + u16 *pi) +{ +#ifdef CONFIG_MLX5_EN_TLS + if (sq->state & BIT(MLX5E_SQ_STATE_TLS)) { + skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi); + if (unlikely(!skb)) + return NULL; + } +#endif + +#ifdef CONFIG_MLX5_EN_IPSEC + if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) { + skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb); + if (unlikely(!skb)) + return NULL; + } +#endif + + return skb; +} + +#endif /* CONFIG_MLX5_ACCEL */ + +#endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 1198fc1eba4c..93bf10e6508c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -45,9 +45,6 @@ #define MLX5E_IPSEC_SADB_RX_BITS 10 #define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L -#define MLX5E_METADATA_ETHER_TYPE (0x8CE4) -#define MLX5E_METADATA_ETHER_LEN 8 - struct mlx5e_priv; struct mlx5e_ipsec_sw_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c new file mode 100644 index 000000000000..d167845271c3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/netdevice.h> +#include <net/ipv6.h> +#include "en_accel/tls.h" +#include "accel/tls.h" + +static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + + MLX5_SET(tls_flow, flow, ipv6, 0); + memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); + memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + MLX5_SET(tls_flow, flow, ipv6, 1); + memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6), + &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); +} +#endif + +static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + + memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport, + MLX5_FLD_SZ_BYTES(tls_flow, src_port)); + memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport, + MLX5_FLD_SZ_BYTES(tls_flow, dst_port)); +} + +static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps) +{ + switch (sk->sk_family) { + case AF_INET: + mlx5e_tls_set_ipv4_flow(flow, sk); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (!sk->sk_ipv6only && + ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { + mlx5e_tls_set_ipv4_flow(flow, sk); + break; + } + if (!(caps & MLX5_ACCEL_TLS_IPV6)) + goto error_out; + + mlx5e_tls_set_ipv6_flow(flow, sk); + break; +#endif + default: + goto error_out; + } + + mlx5e_tls_set_flow_tcp_ports(flow, sk); + return 0; +error_out: + return -EINVAL; +} + +static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct mlx5_core_dev *mdev = priv->mdev; + u32 caps = mlx5_accel_tls_device_caps(mdev); + int ret = -ENOMEM; + void *flow; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return -EINVAL; + + flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL); + if (!flow) + return ret; + + ret = mlx5e_tls_set_flow(flow, sk, caps); + if (ret) + goto free_flow; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + struct mlx5e_tls_offload_context *tx_ctx = + mlx5e_get_tls_tx_context(tls_ctx); + u32 swid; + + ret = mlx5_accel_tls_add_tx_flow(mdev, flow, crypto_info, + start_offload_tcp_sn, &swid); + if (ret < 0) + goto free_flow; + + tx_ctx->swid = htonl(swid); + tx_ctx->expected_seq = start_offload_tcp_sn; + } + + return 0; +free_flow: + kfree(flow); + return ret; +} + +static void mlx5e_tls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + u32 swid = ntohl(mlx5e_get_tls_tx_context(tls_ctx)->swid); + + mlx5_accel_tls_del_tx_flow(priv->mdev, swid); + } else { + netdev_err(netdev, "unsupported direction %d\n", direction); + } +} + +static const struct tlsdev_ops mlx5e_tls_ops = { + .tls_dev_add = mlx5e_tls_add, + .tls_dev_del = mlx5e_tls_del, +}; + +void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + if (!mlx5_accel_is_tls_device(priv->mdev)) + return; + + netdev->features |= NETIF_F_HW_TLS_TX; + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->tlsdev_ops = &mlx5e_tls_ops; +} + +int mlx5e_tls_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls = kzalloc(sizeof(*tls), GFP_KERNEL); + + if (!tls) + return -ENOMEM; + + priv->tls = tls; + return 0; +} + +void mlx5e_tls_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls = priv->tls; + + if (!tls) + return; + + kfree(tls); + priv->tls = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h new file mode 100644 index 000000000000..b6162178f621 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#ifndef __MLX5E_TLS_H__ +#define __MLX5E_TLS_H__ + +#ifdef CONFIG_MLX5_EN_TLS + +#include <net/tls.h> +#include "en.h" + +struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_drop_metadata; + atomic64_t tx_tls_drop_resync_alloc; + atomic64_t tx_tls_drop_no_sync_data; + atomic64_t tx_tls_drop_bypass_required; +}; + +struct mlx5e_tls { + struct mlx5e_tls_sw_stats sw_stats; +}; + +struct mlx5e_tls_offload_context { + struct tls_offload_context base; + u32 expected_seq; + __be32 swid; +}; + +static inline struct mlx5e_tls_offload_context * +mlx5e_get_tls_tx_context(struct tls_context *tls_ctx) +{ + BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context) > + TLS_OFFLOAD_CONTEXT_SIZE); + return container_of(tls_offload_ctx(tls_ctx), + struct mlx5e_tls_offload_context, + base); +} + +void mlx5e_tls_build_netdev(struct mlx5e_priv *priv); +int mlx5e_tls_init(struct mlx5e_priv *priv); +void mlx5e_tls_cleanup(struct mlx5e_priv *priv); + +int mlx5e_tls_get_count(struct mlx5e_priv *priv); +int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data); +int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); + +#else + +static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { } +static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } +static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; } +static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; } + +#endif + +#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c new file mode 100644 index 000000000000..ad2790fb5966 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "en_accel/tls.h" +#include "en_accel/tls_rxtx.h" + +#define SYNDROME_OFFLOAD_REQUIRED 32 +#define SYNDROME_SYNC 33 + +struct sync_info { + u64 rcd_sn; + s32 sync_len; + int nr_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct mlx5e_tls_metadata { + /* One byte of syndrome followed by 3 bytes of swid */ + __be32 syndrome_swid; + __be16 first_seq; + /* packet type ID field */ + __be16 ethertype; +} __packed; + +static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid) +{ + struct mlx5e_tls_metadata *pet; + struct ethhdr *eth; + + if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata))) + return -ENOMEM; + + eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata)); + skb->mac_header -= sizeof(struct mlx5e_tls_metadata); + pet = (struct mlx5e_tls_metadata *)(eth + 1); + + memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata), + 2 * ETH_ALEN); + + eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); + pet->syndrome_swid = htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid; + + return 0; +} + +static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context *context, + u32 tcp_seq, struct sync_info *info) +{ + int remaining, i = 0, ret = -EINVAL; + struct tls_record_info *record; + unsigned long flags; + s32 sync_size; + + spin_lock_irqsave(&context->base.lock, flags); + record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn); + + if (unlikely(!record)) + goto out; + + sync_size = tcp_seq - tls_record_start_seq(record); + info->sync_len = sync_size; + if (unlikely(sync_size < 0)) { + if (tls_record_is_start_marker(record)) + goto done; + + goto out; + } + + remaining = sync_size; + while (remaining > 0) { + info->frags[i] = record->frags[i]; + __skb_frag_ref(&info->frags[i]); + remaining -= skb_frag_size(&info->frags[i]); + + if (remaining < 0) + skb_frag_size_add(&info->frags[i], remaining); + + i++; + } + info->nr_frags = i; +done: + ret = 0; +out: + spin_unlock_irqrestore(&context->base.lock, flags); + return ret; +} + +static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb, + struct sk_buff *nskb, u32 tcp_seq, + int headln, __be64 rcd_sn) +{ + struct mlx5e_tls_metadata *pet; + u8 syndrome = SYNDROME_SYNC; + struct iphdr *iph; + struct tcphdr *th; + int data_len, mss; + + nskb->dev = skb->dev; + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb_network_offset(skb)); + skb_set_transport_header(nskb, skb_transport_offset(skb)); + memcpy(nskb->data, skb->data, headln); + memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn)); + + iph = ip_hdr(nskb); + iph->tot_len = htons(nskb->len - skb_network_offset(nskb)); + th = tcp_hdr(nskb); + data_len = nskb->len - headln; + tcp_seq -= data_len; + th->seq = htonl(tcp_seq); + + mss = nskb->dev->mtu - (headln - skb_network_offset(nskb)); + skb_shinfo(nskb)->gso_size = 0; + if (data_len > mss) { + skb_shinfo(nskb)->gso_size = mss; + skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss); + } + skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + + pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr)); + memcpy(pet, &syndrome, sizeof(syndrome)); + pet->first_seq = htons(tcp_seq); + + /* MLX5 devices don't care about the checksum partial start, offset + * and pseudo header + */ + nskb->ip_summed = CHECKSUM_PARTIAL; + + nskb->xmit_more = 1; + nskb->queue_mapping = skb->queue_mapping; +} + +static struct sk_buff * +mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context *context, + struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, + u16 *pi, + struct mlx5e_tls *tls) +{ + u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); + struct sync_info info; + struct sk_buff *nskb; + int linear_len = 0; + int headln; + int i; + + sq->stats.tls_ooo++; + + if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) { + /* We might get here if a retransmission reaches the driver + * after the relevant record is acked. + * It should be safe to drop the packet in this case + */ + atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data); + goto err_out; + } + + if (unlikely(info.sync_len < 0)) { + u32 payload; + + headln = skb_transport_offset(skb) + tcp_hdrlen(skb); + payload = skb->len - headln; + if (likely(payload <= -info.sync_len)) + /* SKB payload doesn't require offload + */ + return skb; + + atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required); + goto err_out; + } + + if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { + atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata); + goto err_out; + } + + headln = skb_transport_offset(skb) + tcp_hdrlen(skb); + linear_len += headln + sizeof(info.rcd_sn); + nskb = alloc_skb(linear_len, GFP_ATOMIC); + if (unlikely(!nskb)) { + atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc); + goto err_out; + } + + context->expected_seq = tcp_seq + skb->len - headln; + skb_put(nskb, linear_len); + for (i = 0; i < info.nr_frags; i++) + skb_shinfo(nskb)->frags[i] = info.frags[i]; + + skb_shinfo(nskb)->nr_frags = info.nr_frags; + nskb->data_len = info.sync_len; + nskb->len += info.sync_len; + sq->stats.tls_resync_bytes += nskb->len; + mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, + cpu_to_be64(info.rcd_sn)); + mlx5e_sq_xmit(sq, nskb, *wqe, *pi); + mlx5e_sq_fetch_wqe(sq, wqe, pi); + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} + +struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, + u16 *pi) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_tls_offload_context *context; + struct tls_context *tls_ctx; + u32 expected_seq; + int datalen; + u32 skb_seq; + + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + goto out; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + goto out; + + tls_ctx = tls_get_ctx(skb->sk); + if (unlikely(tls_ctx->netdev != netdev)) + goto out; + + skb_seq = ntohl(tcp_hdr(skb)->seq); + context = mlx5e_get_tls_tx_context(tls_ctx); + expected_seq = context->expected_seq; + + if (unlikely(expected_seq != skb_seq)) { + skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls); + goto out; + } + + if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { + atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata); + dev_kfree_skb_any(skb); + skb = NULL; + goto out; + } + + context->expected_seq = skb_seq + datalen; +out: + return skb; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h new file mode 100644 index 000000000000..405dfd302225 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_TLS_RXTX_H__ +#define __MLX5E_TLS_RXTX_H__ + +#ifdef CONFIG_MLX5_EN_TLS + +#include <linux/skbuff.h> +#include "en.h" + +struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, + u16 *pi); + +#endif /* CONFIG_MLX5_EN_TLS */ + +#endif /* __MLX5E_TLS_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c new file mode 100644 index 000000000000..01468ec27446 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "accel/tls.h" +#include "fpga/sdk.h" +#include "en_accel/tls.h" + +static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc) + +int mlx5e_tls_get_count(struct mlx5e_priv *priv) +{ + if (!priv->tls) + return 0; + + return NUM_TLS_SW_COUNTERS; +} + +int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + unsigned int i, idx = 0; + + if (!priv->tls) + return 0; + + for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_tls_sw_stats_desc[i].format); + + return NUM_TLS_SW_COUNTERS; +} + +int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + int i, idx = 0; + + if (!priv->tls) + return 0; + + for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, + mlx5e_tls_sw_stats_desc, i); + + return NUM_TLS_SW_COUNTERS; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 602851ab5b14..d67adf70a97b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -33,16 +33,30 @@ #include <linux/net_dim.h> #include "en.h" +static void +mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, + struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) +{ + mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); + dim->state = NET_DIM_START_MEASURE; +} + void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, - work); + struct net_dim *dim = container_of(work, struct net_dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); - struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode, - dim->profile_ix); + struct net_dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); - mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq, - cur_profile.usec, cur_profile.pkts); + mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); +} - dim->state = NET_DIM_START_MEASURE; +void mlx5e_tx_dim_work(struct work_struct *work) +{ + struct net_dim *dim = container_of(work, struct net_dim, work); + struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); + struct net_dim_cq_moder cur_moder = + net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 37fd0245b6c1..2b786c4d3dab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -389,14 +389,20 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { + struct net_dim_cq_moder *rx_moder, *tx_moder; + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; - coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec; - coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts; - coal->tx_coalesce_usecs = priv->channels.params.tx_cq_moderation.usec; - coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts; - coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + rx_moder = &priv->channels.params.rx_cq_moderation; + coal->rx_coalesce_usecs = rx_moder->usec; + coal->rx_max_coalesced_frames = rx_moder->pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + + tx_moder = &priv->channels.params.tx_cq_moderation; + coal->tx_coalesce_usecs = tx_moder->usec; + coal->tx_max_coalesced_frames = tx_moder->pkts; + coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; return 0; } @@ -438,6 +444,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { + struct net_dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; @@ -463,11 +470,15 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); new_channels.params = priv->channels.params; - new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; - new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; - new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; - new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; - new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + rx_moder = &new_channels.params.rx_cq_moderation; + rx_moder->usec = coal->rx_coalesce_usecs; + rx_moder->pkts = coal->rx_max_coalesced_frames; + new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + + tx_moder = &new_channels.params.tx_cq_moderation; + tx_moder->usec = coal->tx_coalesce_usecs; + tx_moder->pkts = coal->tx_max_coalesced_frames; + new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -475,7 +486,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, } /* we are opened */ - reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; + reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) || + (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled); + if (!reset) { mlx5e_set_priv_channels_coalesce(priv, coal); priv->channels.params = new_channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b29c1d93f058..553eb63753ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -35,13 +35,16 @@ #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <linux/bpf.h> +#include <net/page_pool.h> #include "eswitch.h" #include "en.h" #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" #include "en_accel/ipsec_rxtx.h" +#include "en_accel/tls.h" #include "accel/ipsec.h" +#include "accel/tls.h" #include "vxlan.h" struct mlx5e_rq_param { @@ -389,10 +392,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *rqp, struct mlx5e_rq *rq) { + struct page_pool_params pp_params = { 0 }; struct mlx5_core_dev *mdev = c->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); - u32 byte_count; + u32 byte_count, pool_size; int npages; int wq_sz; int err; @@ -432,9 +436,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); + pool_size = 1 << params->log_rq_mtu_frames; switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + + pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params); rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -512,6 +519,32 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->mkey_be = c->mkey_be; } + /* Create a page_pool and register it with rxq */ + pp_params.order = rq->buff.page_order; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = cpu_to_node(c->cpu); + pp_params.dev = c->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + kfree(rq->wqe.frag_info); + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_rq_wq_destroy; + } + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); + if (err) + goto err_rq_wq_destroy; + for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); @@ -548,6 +581,8 @@ err_rq_wq_destroy: if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); + if (rq->page_pool) + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); return err; @@ -561,6 +596,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); + if (rq->page_pool) + page_pool_destroy(rq->page_pool); switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -979,6 +1016,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); + if (mlx5_accel_is_tls_device(c->priv->mdev)) + set_bit(MLX5E_SQ_STATE_TLS, &sq->state); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); @@ -990,6 +1029,9 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; + INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work); + sq->dim.mode = params->tx_cq_moderation.cq_period_mode; + sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; return 0; @@ -1153,6 +1195,9 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, if (tx_rate) mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); + if (params->tx_dim_enabled) + sq->state |= BIT(MLX5E_SQ_STATE_AM); + return 0; err_free_txqsq: @@ -4049,18 +4094,48 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) { - params->tx_cq_moderation.cq_period_mode = cq_period_mode; + struct net_dim_cq_moder moder; - params->tx_cq_moderation.pkts = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - params->tx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} +static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + struct net_dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - params->tx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) +{ + return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : + NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->tx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); + } else { + params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); + } MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, params->tx_cq_moderation.cq_period_mode == @@ -4069,28 +4144,12 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { - params->rx_cq_moderation.cq_period_mode = cq_period_mode; - - params->rx_cq_moderation.pkts = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - params->rx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - params->rx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - if (params->rx_dim_enabled) { - switch (cq_period_mode) { - case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - params->rx_cq_moderation = - net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE); - break; - case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: - default: - params->rx_cq_moderation = - net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE); - } + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); + } else { + params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); } MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, @@ -4154,6 +4213,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); @@ -4320,6 +4380,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) #endif mlx5e_ipsec_build_netdev(priv); + mlx5e_tls_build_netdev(priv); } static void mlx5e_create_q_counters(struct mlx5e_priv *priv) @@ -4361,12 +4422,16 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev, err = mlx5e_ipsec_init(priv); if (err) mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); + err = mlx5e_tls_init(priv); + if (err) + mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5e_build_nic_netdev(netdev); mlx5e_vxlan_init(priv); } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); mlx5e_vxlan_cleanup(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 176645762e49..7bbf0db27a01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -37,6 +37,7 @@ #include <linux/bpf_trace.h> #include <net/busy_poll.h> #include <net/ip6_checksum.h> +#include <net/page_pool.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -221,7 +222,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, if (mlx5e_rx_cache_get(rq, dma_info)) return 0; - dma_info->page = dev_alloc_pages(rq->buff.page_order); + dma_info->page = page_pool_dev_alloc_pages(rq->page_pool); if (unlikely(!dma_info->page)) return -ENOMEM; @@ -236,15 +237,26 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } +static void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), + rq->buff.map_dir); +} + void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle) { - if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) - return; + if (likely(recycle)) { + if (mlx5e_rx_cache_put(rq, dma_info)) + return; - dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), - rq->buff.map_dir); - put_page(dma_info->page); + mlx5e_page_dma_unmap(rq, dma_info); + page_pool_recycle_direct(rq->page_pool, dma_info->page); + } else { + mlx5e_page_dma_unmap(rq, dma_info); + put_page(dma_info->page); + } } static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq, @@ -800,9 +812,10 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len) { - const struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; u32 act; + int err; if (!prog) return false; @@ -823,6 +836,15 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp))) trace_xdp_exception(rq->netdev, prog, act); return true; + case XDP_REDIRECT: + /* When XDP enabled then page-refcnt==1 here */ + err = xdp_do_redirect(rq->netdev, &xdp, prog); + if (!err) { + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); + rq->xdpsq.db.redirect_flush = true; + mlx5e_page_dma_unmap(rq, di); + } + return true; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -868,6 +890,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, frag_size, DMA_FROM_DEVICE); + prefetchw(va); /* xdp_frame data area */ prefetch(data); wi->offset += frag_size; @@ -1140,6 +1163,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) xdpsq->db.doorbell = false; } + if (xdpsq->db.redirect_flush) { + xdp_do_flush_map(); + xdpsq->db.redirect_flush = false; + } + mlx5_cqwq_update_db_record(&cq->wq); /* ensure cq space is freed before enabling more cqes */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index b08c94422907..e17919c0af08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -32,6 +32,7 @@ #include "en.h" #include "en_accel/ipsec.h" +#include "en_accel/tls.h" static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, @@ -43,6 +44,12 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, + +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, +#endif + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, @@ -161,6 +168,10 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_csum_partial_inner += sq_stats->csum_partial_inner; s->tx_csum_none += sq_stats->csum_none; s->tx_csum_partial += sq_stats->csum_partial; +#ifdef CONFIG_MLX5_EN_TLS + s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; +#endif } } @@ -1065,6 +1076,22 @@ static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv) mlx5e_ipsec_update_stats(priv); } +static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_tls_get_count(priv); +} + +static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN); +} + +static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + return idx + mlx5e_tls_get_stats(priv, data + idx); +} + static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, @@ -1268,6 +1295,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .update_stats = mlx5e_grp_ipsec_update_stats, }, { + .get_num_stats = mlx5e_grp_tls_get_num_stats, + .fill_strings = mlx5e_grp_tls_fill_strings, + .fill_stats = mlx5e_grp_tls_fill_stats, + }, + { .get_num_stats = mlx5e_grp_channels_get_num_stats, .fill_strings = mlx5e_grp_channels_fill_strings, .fill_stats = mlx5e_grp_channels_fill_stats, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 53111a2df587..a36e6a87066b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -93,6 +93,11 @@ struct mlx5e_sw_stats { u64 rx_cache_waive; u64 ch_eq_rearm; +#ifdef CONFIG_MLX5_EN_TLS + u64 tx_tls_ooo; + u64 tx_tls_resync_bytes; +#endif + /* Special handling counters */ u64 link_down_events_phy; }; @@ -194,6 +199,10 @@ struct mlx5e_sq_stats { u64 csum_partial_inner; u64 added_vlan_packets; u64 nop; +#ifdef CONFIG_MLX5_EN_TLS + u64 tls_ooo; + u64 tls_resync_bytes; +#endif /* less likely accessed in data path */ u64 csum_none; u64 stopped; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5532aa3675c7..047614d2eda2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -35,12 +35,21 @@ #include <net/dsfield.h> #include "en.h" #include "ipoib/ipoib.h" -#include "en_accel/ipsec_rxtx.h" +#include "en_accel/en_accel.h" #include "lib/clock.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS + +#ifndef CONFIG_MLX5_EN_TLS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) +#else +/* TLS offload requires MLX5E_SQ_STOP_ROOM to have + * enough room for a resync SKB, a normal SKB and a NOP + */ +#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) +#endif static inline void mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) @@ -329,8 +338,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, } } -static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tx_wqe *wqe, u16 pi) +netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe *wqe, u16 pi) { struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; @@ -401,21 +410,19 @@ err_drop: netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)]; - struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe *wqe; + struct mlx5e_txqsq *sq; + u16 pi; - memset(wqe, 0, sizeof(*wqe)); + sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + mlx5e_sq_fetch_wqe(sq, &wqe, &pi); -#ifdef CONFIG_MLX5_EN_IPSEC - if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) { - skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb); - if (unlikely(!skb)) - return NETDEV_TX_OK; - } +#ifdef CONFIG_MLX5_ACCEL + /* might send skbs and update wqe and pi */ + skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi); + if (unlikely(!skb)) + return NETDEV_TX_OK; #endif - return mlx5e_sq_xmit(sq, skb, wqe, pi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index f292bb346985..900661d45c8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -44,6 +44,30 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) return cpumask_test_cpu(current_cpu, aff); } +static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) +{ + struct net_dim_sample dim_sample; + + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_AM))) + return; + + net_dim_sample(sq->cq.event_ctr, sq->stats.packets, sq->stats.bytes, + &dim_sample); + net_dim(&sq->dim, dim_sample); +} + +static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) +{ + struct net_dim_sample dim_sample; + + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_AM))) + return; + + net_dim_sample(rq->cq.event_ctr, rq->stats.packets, rq->stats.bytes, + &dim_sample); + net_dim(&rq->dim, dim_sample); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -75,18 +99,13 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (unlikely(!napi_complete_done(napi, work_done))) return work_done; - for (i = 0; i < c->num_tc; i++) + for (i = 0; i < c->num_tc; i++) { + mlx5e_handle_tx_dim(&c->sq[i]); mlx5e_cq_arm(&c->sq[i].cq); - - if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) { - struct net_dim_sample dim_sample; - net_dim_sample(c->rq.cq.event_ctr, - c->rq.stats.packets, - c->rq.stats.bytes, - &dim_sample); - net_dim(&c->rq.dim, dim_sample); } + mlx5e_handle_rx_dim(&c->rq); + mlx5e_cq_arm(&c->rq.cq); mlx5e_cq_arm(&c->icosq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 82405ed84725..3e2355c8df3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -53,6 +53,7 @@ struct mlx5_fpga_device { } conn_res; struct mlx5_fpga_ipsec *ipsec; + struct mlx5_fpga_tls *tls; }; #define mlx5_fpga_dbg(__adev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 0f5da499a223..3c4f1f326e13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -43,9 +43,6 @@ #include "fpga/sdk.h" #include "fpga/core.h" -#define SBU_QP_QUEUE_SIZE 8 -#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC (60 * 1000) - enum mlx5_fpga_ipsec_cmd_status { MLX5_FPGA_IPSEC_CMD_PENDING, MLX5_FPGA_IPSEC_CMD_SEND_FAIL, @@ -258,7 +255,7 @@ static int mlx5_fpga_ipsec_cmd_wait(void *ctx) { struct mlx5_fpga_ipsec_cmd_context *context = ctx; unsigned long timeout = - msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC); + msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC); int res; res = wait_for_completion_timeout(&context->complete, timeout); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h index baa537e54a49..a0573cc2fc9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -41,6 +41,8 @@ * DOC: Innova SDK * This header defines the in-kernel API for Innova FPGA client drivers. */ +#define SBU_QP_QUEUE_SIZE 8 +#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000) enum mlx5_fpga_access_type { MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c new file mode 100644 index 000000000000..21048013826c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -0,0 +1,562 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> +#include "fpga/tls.h" +#include "fpga/cmd.h" +#include "fpga/sdk.h" +#include "fpga/core.h" +#include "accel/tls.h" + +struct mlx5_fpga_tls_command_context; + +typedef void (*mlx5_fpga_tls_command_complete) + (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *ctx, + struct mlx5_fpga_dma_buf *resp); + +struct mlx5_fpga_tls_command_context { + struct list_head list; + /* There is no guarantee on the order between the TX completion + * and the command response. + * The TX completion is going to touch cmd->buf even in + * the case of successful transmission. + * So instead of requiring separate allocations for cmd + * and cmd->buf we've decided to use a reference counter + */ + refcount_t ref; + struct mlx5_fpga_dma_buf buf; + mlx5_fpga_tls_command_complete complete; +}; + +static void +mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx) +{ + if (refcount_dec_and_test(&ctx->ref)) + kfree(ctx); +} + +static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *resp) +{ + struct mlx5_fpga_conn *conn = fdev->tls->conn; + struct mlx5_fpga_tls_command_context *ctx; + struct mlx5_fpga_tls *tls = fdev->tls; + unsigned long flags; + + spin_lock_irqsave(&tls->pending_cmds_lock, flags); + ctx = list_first_entry(&tls->pending_cmds, + struct mlx5_fpga_tls_command_context, list); + list_del(&ctx->list); + spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); + ctx->complete(conn, fdev, ctx, resp); +} + +static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, + u8 status) +{ + struct mlx5_fpga_tls_command_context *ctx = + container_of(buf, struct mlx5_fpga_tls_command_context, buf); + + mlx5_fpga_tls_put_command_ctx(ctx); + + if (unlikely(status)) + mlx5_fpga_tls_cmd_complete(fdev, NULL); +} + +static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *cmd, + mlx5_fpga_tls_command_complete complete) +{ + struct mlx5_fpga_tls *tls = fdev->tls; + unsigned long flags; + int ret; + + refcount_set(&cmd->ref, 2); + cmd->complete = complete; + cmd->buf.complete = mlx5_fpga_cmd_send_complete; + + spin_lock_irqsave(&tls->pending_cmds_lock, flags); + /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock + * to make sure commands are inserted to the tls->pending_cmds list + * and the command QP in the same order. + */ + ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf); + if (likely(!ret)) + list_add_tail(&cmd->list, &tls->pending_cmds); + else + complete(tls->conn, fdev, cmd, NULL); + spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); +} + +/* Start of context identifiers range (inclusive) */ +#define SWID_START 0 +/* End of context identifiers range (exclusive) */ +#define SWID_END BIT(24) + +static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, + void *ptr) +{ + int ret; + + /* TLS metadata format is 1 byte for syndrome followed + * by 3 bytes of swid (software ID) + * swid must not exceed 3 bytes. + * See tls_rxtx.c:insert_pet() for details + */ + BUILD_BUG_ON((SWID_END - 1) & 0xFF000000); + + idr_preload(GFP_KERNEL); + spin_lock_irq(idr_spinlock); + ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC); + spin_unlock_irq(idr_spinlock); + idr_preload_end(); + + return ret; +} + +static void mlx5_fpga_tls_release_swid(struct idr *idr, + spinlock_t *idr_spinlock, u32 swid) +{ + unsigned long flags; + + spin_lock_irqsave(idr_spinlock, flags); + idr_remove(idr, swid); + spin_unlock_irqrestore(idr_spinlock, flags); +} + +struct mlx5_teardown_stream_context { + struct mlx5_fpga_tls_command_context cmd; + u32 swid; +}; + +static void +mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *cmd, + struct mlx5_fpga_dma_buf *resp) +{ + struct mlx5_teardown_stream_context *ctx = + container_of(cmd, struct mlx5_teardown_stream_context, cmd); + + if (resp) { + u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); + + if (syndrome) + mlx5_fpga_err(fdev, + "Teardown stream failed with syndrome = %d", + syndrome); + else + mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr, + &fdev->tls->idr_spinlock, + ctx->swid); + } + mlx5_fpga_tls_put_command_ctx(cmd); +} + +static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd) +{ + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow, + MLX5_BYTE_OFF(tls_flow, ipv6)); + + MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6)); + MLX5_SET(tls_cmd, cmd, direction_sx, + MLX5_GET(tls_flow, flow, direction_sx)); +} + +void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow, + u32 swid, gfp_t flags) +{ + struct mlx5_teardown_stream_context *ctx; + struct mlx5_fpga_dma_buf *buf; + void *cmd; + + ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags); + if (!ctx) + return; + + buf = &ctx->cmd.buf; + cmd = (ctx + 1); + MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); + MLX5_SET(tls_cmd, cmd, swid, swid); + + mlx5_fpga_tls_flow_to_cmd(flow, cmd); + kfree(flow); + + buf->sg[0].data = cmd; + buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; + + ctx->swid = swid; + mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_teardown_completion); +} + +void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid, + gfp_t flags) +{ + struct mlx5_fpga_tls *tls = mdev->fpga->tls; + void *flow; + + rcu_read_lock(); + flow = idr_find(&tls->tx_idr, swid); + rcu_read_unlock(); + + if (!flow) { + mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", + swid); + return; + } + + mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); +} + +enum mlx5_fpga_setup_stream_status { + MLX5_FPGA_CMD_PENDING, + MLX5_FPGA_CMD_SEND_FAILED, + MLX5_FPGA_CMD_RESPONSE_RECEIVED, + MLX5_FPGA_CMD_ABANDONED, +}; + +struct mlx5_setup_stream_context { + struct mlx5_fpga_tls_command_context cmd; + atomic_t status; + u32 syndrome; + struct completion comp; +}; + +static void +mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *cmd, + struct mlx5_fpga_dma_buf *resp) +{ + struct mlx5_setup_stream_context *ctx = + container_of(cmd, struct mlx5_setup_stream_context, cmd); + int status = MLX5_FPGA_CMD_SEND_FAILED; + void *tls_cmd = ctx + 1; + + /* If we failed to send to command resp == NULL */ + if (resp) { + ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); + status = MLX5_FPGA_CMD_RESPONSE_RECEIVED; + } + + status = atomic_xchg_release(&ctx->status, status); + if (likely(status != MLX5_FPGA_CMD_ABANDONED)) { + complete(&ctx->comp); + return; + } + + mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n", + ctx->syndrome); + + if (!ctx->syndrome) { + /* The process was killed while waiting for the context to be + * added, and the add completed successfully. + * We need to destroy the HW context, and we can't can't reuse + * the command context because we might not have received + * the tx completion yet. + */ + mlx5_fpga_tls_del_tx_flow(fdev->mdev, + MLX5_GET(tls_cmd, tls_cmd, swid), + GFP_ATOMIC); + } + + mlx5_fpga_tls_put_command_ctx(cmd); +} + +static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev, + struct mlx5_setup_stream_context *ctx) +{ + struct mlx5_fpga_dma_buf *buf; + void *cmd = ctx + 1; + int status, ret = 0; + + buf = &ctx->cmd.buf; + buf->sg[0].data = cmd; + buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; + MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM); + + init_completion(&ctx->comp); + atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING); + ctx->syndrome = -1; + + mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_setup_completion); + wait_for_completion_killable(&ctx->comp); + + status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED); + if (unlikely(status == MLX5_FPGA_CMD_PENDING)) + /* ctx is going to be released in mlx5_fpga_tls_setup_completion */ + return -EINTR; + + if (unlikely(ctx->syndrome)) + ret = -ENOMEM; + + mlx5_fpga_tls_put_command_ctx(&ctx->cmd); + return ret; +} + +static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg; + + mlx5_fpga_tls_cmd_complete(fdev, buf); +} + +bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev) +{ + if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) + return false; + + if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != + MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) + return false; + + if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS) + return false; + + if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0) + return false; + + return true; +} + +static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev, + u32 *p_caps) +{ + int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap); + u32 caps = 0; + void *buf; + + buf = kzalloc(cap_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf); + if (err) + goto out; + + if (MLX5_GET(tls_extended_cap, buf, tx)) + caps |= MLX5_ACCEL_TLS_TX; + if (MLX5_GET(tls_extended_cap, buf, rx)) + caps |= MLX5_ACCEL_TLS_RX; + if (MLX5_GET(tls_extended_cap, buf, tls_v12)) + caps |= MLX5_ACCEL_TLS_V12; + if (MLX5_GET(tls_extended_cap, buf, tls_v13)) + caps |= MLX5_ACCEL_TLS_V13; + if (MLX5_GET(tls_extended_cap, buf, lro)) + caps |= MLX5_ACCEL_TLS_LRO; + if (MLX5_GET(tls_extended_cap, buf, ipv6)) + caps |= MLX5_ACCEL_TLS_IPV6; + + if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128)) + caps |= MLX5_ACCEL_TLS_AES_GCM128; + if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256)) + caps |= MLX5_ACCEL_TLS_AES_GCM256; + + *p_caps = caps; + err = 0; +out: + kfree(buf); + return err; +} + +int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_conn_attr init_attr = {0}; + struct mlx5_fpga_conn *conn; + struct mlx5_fpga_tls *tls; + int err = 0; + + if (!mlx5_fpga_is_tls_device(mdev) || !fdev) + return 0; + + tls = kzalloc(sizeof(*tls), GFP_KERNEL); + if (!tls) + return -ENOMEM; + + err = mlx5_fpga_tls_get_caps(fdev, &tls->caps); + if (err) + goto error; + + if (!(tls->caps & (MLX5_ACCEL_TLS_TX | MLX5_ACCEL_TLS_V12 | + MLX5_ACCEL_TLS_AES_GCM128))) { + err = -ENOTSUPP; + goto error; + } + + init_attr.rx_size = SBU_QP_QUEUE_SIZE; + init_attr.tx_size = SBU_QP_QUEUE_SIZE; + init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb; + init_attr.cb_arg = fdev; + conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); + if (IS_ERR(conn)) { + err = PTR_ERR(conn); + mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n", + err); + goto error; + } + + tls->conn = conn; + spin_lock_init(&tls->pending_cmds_lock); + INIT_LIST_HEAD(&tls->pending_cmds); + + idr_init(&tls->tx_idr); + spin_lock_init(&tls->idr_spinlock); + fdev->tls = tls; + return 0; + +error: + kfree(tls); + return err; +} + +void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!fdev || !fdev->tls) + return; + + mlx5_fpga_sbu_conn_destroy(fdev->tls->conn); + kfree(fdev->tls); + fdev->tls = NULL; +} + +static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd, + struct tls_crypto_info *info, + __be64 *rcd_sn) +{ + struct tls12_crypto_info_aes_gcm_128 *crypto_info = + (struct tls12_crypto_info_aes_gcm_128 *)info; + + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv), + crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key), + crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); + + /* in AES-GCM 128 we need to write the key twice */ + memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) + + TLS_CIPHER_AES_GCM_128_KEY_SIZE, + crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); + + MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128); +} + +static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps, + struct tls_crypto_info *crypto_info) +{ + __be64 rcd_sn; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (!(caps & MLX5_ACCEL_TLS_AES_GCM128)) + return -EINVAL; + mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, u32 swid, + u32 tcp_sn) +{ + u32 caps = mlx5_fpga_tls_device_caps(mdev); + struct mlx5_setup_stream_context *ctx; + int ret = -ENOMEM; + size_t cmd_size; + void *cmd; + + cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx); + ctx = kzalloc(cmd_size, GFP_KERNEL); + if (!ctx) + goto out; + + cmd = ctx + 1; + ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info); + if (ret) + goto free_ctx; + + mlx5_fpga_tls_flow_to_cmd(flow, cmd); + + MLX5_SET(tls_cmd, cmd, swid, swid); + MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn); + + return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx); + +free_ctx: + kfree(ctx); +out: + return ret; +} + +int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid) +{ + struct mlx5_fpga_tls *tls = mdev->fpga->tls; + int ret = -ENOMEM; + u32 swid; + + ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, &tls->idr_spinlock, flow); + if (ret < 0) + return ret; + + swid = ret; + MLX5_SET(tls_flow, flow, direction_sx, 1); + + ret = mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid, + start_offload_tcp_sn); + if (ret && ret != -EINTR) + goto free_swid; + + *p_swid = swid; + return 0; +free_swid: + mlx5_fpga_tls_release_swid(&tls->tx_idr, &tls->idr_spinlock, swid); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h new file mode 100644 index 000000000000..800a214e4e49 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_TLS_H__ +#define __MLX5_FPGA_TLS_H__ + +#include <linux/mlx5/driver.h> + +#include <net/tls.h> +#include "fpga/core.h" + +struct mlx5_fpga_tls { + struct list_head pending_cmds; + spinlock_t pending_cmds_lock; /* Protects pending_cmds */ + u32 caps; + struct mlx5_fpga_conn *conn; + + struct idr tx_idr; + spinlock_t idr_spinlock; /* protects the IDR */ +}; + +int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn, u32 *p_swid); + +void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid, + gfp_t flags); + +bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev); +int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev); + +static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev) +{ + return mdev->fpga->tls->caps; +} + +#endif /* __MLX5_FPGA_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 63a8ea31601c..b865597630ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -60,6 +60,7 @@ #include "fpga/core.h" #include "fpga/ipsec.h" #include "accel/ipsec.h" +#include "accel/tls.h" #include "lib/clock.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); @@ -1190,6 +1191,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_ipsec_start; } + err = mlx5_accel_tls_init(dev); + if (err) { + dev_err(&pdev->dev, "TLS device start failed %d\n", err); + goto err_tls_start; + } + err = mlx5_init_fs(dev); if (err) { dev_err(&pdev->dev, "Failed to init flow steering\n"); @@ -1231,6 +1238,9 @@ err_sriov: mlx5_cleanup_fs(dev); err_fs: + mlx5_accel_tls_cleanup(dev); + +err_tls_start: mlx5_accel_ipsec_cleanup(dev); err_ipsec_start: @@ -1306,6 +1316,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); + mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index 479511cf79bc..8da91b023b13 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -424,10 +424,15 @@ MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_rdq_sz, 0x04, 24, 8); MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_rdqs, 0x04, 0, 8); /* cmd_mbox_query_aq_cap_log_max_cq_sz - * Log (base 2) of max CQEs allowed on CQ. + * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv0 and CQEv1. */ MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cq_sz, 0x08, 24, 8); +/* cmd_mbox_query_aq_cap_log_max_cqv2_sz + * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv2. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cqv2_sz, 0x08, 16, 8); + /* cmd_mbox_query_aq_cap_max_num_cqs * Maximum number of CQs. */ @@ -662,6 +667,12 @@ MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1); */ MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1); +/* cmd_mbox_config_set_cqe_version + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1); + /* cmd_mbox_config_profile_max_vepa_channels * Maximum number of VEPA channels per port (0 through 16) * 0 - multi-channel VEPA is disabled @@ -841,6 +852,14 @@ MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_type, MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_properties, 0x60, 0, 8, 0x08, 0x00, false); +/* cmd_mbox_config_profile_cqe_version + * CQE version: + * 0: CQE version is 0 + * 1: CQE version is either 1 or 2 + * CQE ver 1 or 2 is configured by Completion Queue Context field cqe_ver. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, cqe_version, 0xB0, 0, 8); + /* ACCESS_REG - Access EMAD Supported Register * ---------------------------------- * OpMod == 0 (N/A), INMmod == 0 (N/A) @@ -1032,11 +1051,15 @@ static inline int mlxsw_cmd_sw2hw_cq(struct mlxsw_core *mlxsw_core, 0, cq_number, in_mbox, MLXSW_CMD_MBOX_SIZE); } -/* cmd_mbox_sw2hw_cq_cv +enum mlxsw_cmd_mbox_sw2hw_cq_cqe_ver { + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2, +}; + +/* cmd_mbox_sw2hw_cq_cqe_ver * CQE Version. - * 0 - CQE Version 0, 1 - CQE Version 1 */ -MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cv, 0x00, 28, 4); +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cqe_ver, 0x00, 28, 4); /* cmd_mbox_sw2hw_cq_c_eqn * Event Queue this CQ reports completion events to. diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 3a9381977d6d..db794a1a3a7e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -117,6 +117,7 @@ struct mlxsw_pci_queue { struct { u32 comp_sdq_count; u32 comp_rdq_count; + enum mlxsw_pci_cqe_v v; } cq; struct { u32 ev_cmd_count; @@ -155,6 +156,8 @@ struct mlxsw_pci { } cmd; struct mlxsw_bus_info bus_info; const struct pci_device_id *id; + enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */ + u8 num_sdq_cqs; /* Number of CQs used for SDQs */ }; static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) @@ -202,24 +205,6 @@ static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit) return owner_bit != !!(q->consumer_counter & q->count); } -static char * -mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q, - u32 (*get_elem_owner_func)(const char *)) -{ - struct mlxsw_pci_queue_elem_info *elem_info; - char *elem; - bool owner_bit; - - elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); - elem = elem_info->elem; - owner_bit = get_elem_owner_func(elem); - if (mlxsw_pci_elem_hw_owned(q, owner_bit)) - return NULL; - q->consumer_counter++; - rmb(); /* make sure we read owned bit before the rest of elem */ - return elem; -} - static struct mlxsw_pci_queue_type_group * mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci, enum mlxsw_pci_queue_type q_type) @@ -494,6 +479,17 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, } } +static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + q->u.cq.v = mlxsw_pci->max_cqe_ver; + + /* For SDQ it is pointless to use CQEv2, so use CQEv1 instead */ + if (q->u.cq.v == MLXSW_PCI_CQE_V2 && + q->num < mlxsw_pci->num_sdq_cqs) + q->u.cq.v = MLXSW_PCI_CQE_V1; +} + static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { @@ -505,10 +501,16 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, for (i = 0; i < q->count; i++) { char *elem = mlxsw_pci_queue_elem_get(q, i); - mlxsw_pci_cqe_owner_set(elem, 1); + mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); } - mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */ + if (q->u.cq.v == MLXSW_PCI_CQE_V1) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); + else if (q->u.cq.v == MLXSW_PCI_CQE_V2) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); + mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); @@ -559,7 +561,7 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, - char *cqe) + enum mlxsw_pci_cqe_v cqe_v, char *cqe) { struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; @@ -579,10 +581,11 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - if (mlxsw_pci_cqe_lag_get(cqe)) { + if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) { rx_info.is_lag = true; - rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe); - rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe); + rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe); + rx_info.lag_port_index = + mlxsw_pci_cqe_lag_subport_get(cqe_v, cqe); } else { rx_info.is_lag = false; rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); @@ -591,7 +594,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); byte_count = mlxsw_pci_cqe_byte_count_get(cqe); - if (mlxsw_pci_cqe_crc_get(cqe)) + if (mlxsw_pci_cqe_crc_get(cqe_v, cqe)) byte_count -= ETH_FCS_LEN; skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); @@ -608,7 +611,18 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) { - return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_cqe_owner_get); + struct mlxsw_pci_queue_elem_info *elem_info; + char *elem; + bool owner_bit; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + elem = elem_info->elem; + owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem); + if (mlxsw_pci_elem_hw_owned(q, owner_bit)) + return NULL; + q->consumer_counter++; + rmb(); /* make sure we read owned bit before the rest of elem */ + return elem; } static void mlxsw_pci_cq_tasklet(unsigned long data) @@ -621,8 +635,8 @@ static void mlxsw_pci_cq_tasklet(unsigned long data) while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); - u8 sendq = mlxsw_pci_cqe_sr_get(cqe); - u8 dqn = mlxsw_pci_cqe_dqn_get(cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); if (sendq) { struct mlxsw_pci_queue *sdq; @@ -636,7 +650,7 @@ static void mlxsw_pci_cq_tasklet(unsigned long data) rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, cqe); + wqe_counter, q->u.cq.v, cqe); q->u.cq.comp_rdq_count++; } if (++items == credits) @@ -648,6 +662,18 @@ static void mlxsw_pci_cq_tasklet(unsigned long data) } } +static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) +{ + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : + MLXSW_PCI_CQE01_COUNT; +} + +static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) +{ + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : + MLXSW_PCI_CQE01_SIZE; +} + static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { @@ -696,7 +722,18 @@ static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) { - return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_eqe_owner_get); + struct mlxsw_pci_queue_elem_info *elem_info; + char *elem; + bool owner_bit; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + elem = elem_info->elem; + owner_bit = mlxsw_pci_eqe_owner_get(elem); + if (mlxsw_pci_elem_hw_owned(q, owner_bit)) + return NULL; + q->consumer_counter++; + rmb(); /* make sure we read owned bit before the rest of elem */ + return elem; } static void mlxsw_pci_eq_tasklet(unsigned long data) @@ -749,11 +786,15 @@ static void mlxsw_pci_eq_tasklet(unsigned long data) struct mlxsw_pci_queue_ops { const char *name; enum mlxsw_pci_queue_type type; + void (*pre_init)(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q); int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q); void (*fini)(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q); void (*tasklet)(unsigned long data); + u16 (*elem_count_f)(const struct mlxsw_pci_queue *q); + u8 (*elem_size_f)(const struct mlxsw_pci_queue *q); u16 elem_count; u8 elem_size; }; @@ -776,11 +817,12 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = { static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = { .type = MLXSW_PCI_QUEUE_TYPE_CQ, + .pre_init = mlxsw_pci_cq_pre_init, .init = mlxsw_pci_cq_init, .fini = mlxsw_pci_cq_fini, .tasklet = mlxsw_pci_cq_tasklet, - .elem_count = MLXSW_PCI_CQE_COUNT, - .elem_size = MLXSW_PCI_CQE_SIZE + .elem_count_f = mlxsw_pci_cq_elem_count, + .elem_size_f = mlxsw_pci_cq_elem_size }; static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = { @@ -800,10 +842,15 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, int i; int err; - spin_lock_init(&q->lock); q->num = q_num; - q->count = q_ops->elem_count; - q->elem_size = q_ops->elem_size; + if (q_ops->pre_init) + q_ops->pre_init(mlxsw_pci, q); + + spin_lock_init(&q->lock); + q->count = q_ops->elem_count_f ? q_ops->elem_count_f(q) : + q_ops->elem_count; + q->elem_size = q_ops->elem_size_f ? q_ops->elem_size_f(q) : + q_ops->elem_size; q->type = q_ops->type; q->pci = mlxsw_pci; @@ -832,7 +879,7 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, elem_info = mlxsw_pci_queue_elem_info_get(q, i); elem_info->elem = - __mlxsw_pci_queue_elem_get(q, q_ops->elem_size, i); + __mlxsw_pci_queue_elem_get(q, q->elem_size, i); } mlxsw_cmd_mbox_zero(mbox); @@ -912,6 +959,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) u8 rdq_log2sz; u8 num_cqs; u8 cq_log2sz; + u8 cqv2_log2sz; u8 num_eqs; u8 eq_log2sz; int err; @@ -927,6 +975,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox); num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox); cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox); + cqv2_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cqv2_sz_get(mbox); num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox); eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox); @@ -938,12 +987,16 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) || (1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) || - (1 << cq_log2sz != MLXSW_PCI_CQE_COUNT) || + (1 << cq_log2sz != MLXSW_PCI_CQE01_COUNT) || + (mlxsw_pci->max_cqe_ver == MLXSW_PCI_CQE_V2 && + (1 << cqv2_log2sz != MLXSW_PCI_CQE2_COUNT)) || (1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) { dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n"); return -EINVAL; } + mlxsw_pci->num_sdq_cqs = num_sdqs; + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops, num_eqs); if (err) { @@ -1184,6 +1237,11 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, &profile->swid_config[i]); + if (mlxsw_pci->max_cqe_ver > MLXSW_PCI_CQE_V0) { + mlxsw_cmd_mbox_config_profile_set_cqe_version_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_cqe_version_set(mbox, 1); + } + return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox); } @@ -1378,6 +1436,21 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_query_resources; + if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V2) && + MLXSW_CORE_RES_GET(mlxsw_core, CQE_V2)) + mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V2; + else if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V1) && + MLXSW_CORE_RES_GET(mlxsw_core, CQE_V1)) + mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V1; + else if ((MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0) && + MLXSW_CORE_RES_GET(mlxsw_core, CQE_V0)) || + !MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0)) { + mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V0; + } else { + dev_err(&pdev->dev, "Invalid supported CQE version combination reported\n"); + goto err_cqe_v_check; + } + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res); if (err) goto err_config_profile; @@ -1400,6 +1473,7 @@ err_request_eq_irq: mlxsw_pci_aqs_fini(mlxsw_pci); err_aqs_init: err_config_profile: +err_cqe_v_check: err_query_resources: err_boardinfo: mlxsw_pci_fw_area_fini(mlxsw_pci); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index fb082ad21b00..963155f6a17a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -82,10 +82,12 @@ #define MLXSW_PCI_AQ_PAGES 8 #define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ -#define MLXSW_PCI_CQE_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */ #define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) -#define MLXSW_PCI_CQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE) +#define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE) +#define MLXSW_PCI_CQE2_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE2_SIZE) #define MLXSW_PCI_EQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE) #define MLXSW_PCI_EQE_UPDATE_COUNT 0x80 @@ -126,10 +128,48 @@ MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false); */ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); +enum mlxsw_pci_cqe_v { + MLXSW_PCI_CQE_V0, + MLXSW_PCI_CQE_V1, + MLXSW_PCI_CQE_V2, +}; + +#define mlxsw_pci_cqe_item_helpers(name, v0, v1, v2) \ +static inline u32 mlxsw_pci_cqe_##name##_get(enum mlxsw_pci_cqe_v v, char *cqe) \ +{ \ + switch (v) { \ + default: \ + case MLXSW_PCI_CQE_V0: \ + return mlxsw_pci_cqe##v0##_##name##_get(cqe); \ + case MLXSW_PCI_CQE_V1: \ + return mlxsw_pci_cqe##v1##_##name##_get(cqe); \ + case MLXSW_PCI_CQE_V2: \ + return mlxsw_pci_cqe##v2##_##name##_get(cqe); \ + } \ +} \ +static inline void mlxsw_pci_cqe_##name##_set(enum mlxsw_pci_cqe_v v, \ + char *cqe, u32 val) \ +{ \ + switch (v) { \ + default: \ + case MLXSW_PCI_CQE_V0: \ + mlxsw_pci_cqe##v0##_##name##_set(cqe, val); \ + break; \ + case MLXSW_PCI_CQE_V1: \ + mlxsw_pci_cqe##v1##_##name##_set(cqe, val); \ + break; \ + case MLXSW_PCI_CQE_V2: \ + mlxsw_pci_cqe##v2##_##name##_set(cqe, val); \ + break; \ + } \ +} + /* pci_cqe_lag * Packet arrives from a port which is a LAG */ -MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); +MLXSW_ITEM32(pci, cqe0, lag, 0x00, 23, 1); +MLXSW_ITEM32(pci, cqe12, lag, 0x00, 24, 1); +mlxsw_pci_cqe_item_helpers(lag, 0, 12, 12); /* pci_cqe_system_port/lag_id * When lag=0: System port on which the packet was received @@ -138,8 +178,12 @@ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); * bits [3:0] sub_port on which the packet was received */ MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); -MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); -MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); +MLXSW_ITEM32(pci, cqe0, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe12, lag_id, 0x00, 0, 16); +mlxsw_pci_cqe_item_helpers(lag_id, 0, 12, 12); +MLXSW_ITEM32(pci, cqe0, lag_subport, 0x00, 0, 4); +MLXSW_ITEM32(pci, cqe12, lag_subport, 0x00, 16, 8); +mlxsw_pci_cqe_item_helpers(lag_subport, 0, 12, 12); /* pci_cqe_wqe_counter * WQE count of the WQEs completed on the associated dqn @@ -162,28 +206,38 @@ MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 9); * Length include CRC. Indicates the length field includes * the packet's CRC. */ -MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1); +MLXSW_ITEM32(pci, cqe0, crc, 0x0C, 8, 1); +MLXSW_ITEM32(pci, cqe12, crc, 0x0C, 9, 1); +mlxsw_pci_cqe_item_helpers(crc, 0, 12, 12); /* pci_cqe_e * CQE with Error. */ -MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1); +MLXSW_ITEM32(pci, cqe0, e, 0x0C, 7, 1); +MLXSW_ITEM32(pci, cqe12, e, 0x00, 27, 1); +mlxsw_pci_cqe_item_helpers(e, 0, 12, 12); /* pci_cqe_sr * 1 - Send Queue * 0 - Receive Queue */ -MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1); +MLXSW_ITEM32(pci, cqe0, sr, 0x0C, 6, 1); +MLXSW_ITEM32(pci, cqe12, sr, 0x00, 26, 1); +mlxsw_pci_cqe_item_helpers(sr, 0, 12, 12); /* pci_cqe_dqn * Descriptor Queue (DQ) Number. */ -MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5); +MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5); +MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6); +mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12); /* pci_cqe_owner * Ownership bit. */ -MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1); +MLXSW_ITEM32(pci, cqe01, owner, 0x0C, 0, 1); +MLXSW_ITEM32(pci, cqe2, owner, 0x1C, 0, 1); +mlxsw_pci_cqe_item_helpers(owner, 01, 01, 2); /* pci_eqe_event_type * Event type. diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 087aad52c195..fd9299ccec72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -43,6 +43,9 @@ enum mlxsw_res_id { MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE, MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE, MLXSW_RES_ID_MAX_TRAP_GROUPS, + MLXSW_RES_ID_CQE_V0, + MLXSW_RES_ID_CQE_V1, + MLXSW_RES_ID_CQE_V2, MLXSW_RES_ID_COUNTER_POOL_SIZE, MLXSW_RES_ID_MAX_SPAN, MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, @@ -81,6 +84,9 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002, [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003, [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201, + [MLXSW_RES_ID_CQE_V0] = 0x2210, + [MLXSW_RES_ID_CQE_V1] = 0x2211, + [MLXSW_RES_ID_CQE_V2] = 0x2212, [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, [MLXSW_RES_ID_MAX_SPAN] = 0x2420, [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ca38a30fbe91..94132f6cec61 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -441,29 +441,29 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); } -int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, - u8 state) +enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - enum mlxsw_reg_spms_state spms_state; - char *spms_pl; - int err; - switch (state) { case BR_STATE_FORWARDING: - spms_state = MLXSW_REG_SPMS_STATE_FORWARDING; - break; + return MLXSW_REG_SPMS_STATE_FORWARDING; case BR_STATE_LEARNING: - spms_state = MLXSW_REG_SPMS_STATE_LEARNING; - break; + return MLXSW_REG_SPMS_STATE_LEARNING; case BR_STATE_LISTENING: /* fall-through */ case BR_STATE_DISABLED: /* fall-through */ case BR_STATE_BLOCKING: - spms_state = MLXSW_REG_SPMS_STATE_DISCARDING; - break; + return MLXSW_REG_SPMS_STATE_DISCARDING; default: BUG(); } +} + +int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + u8 state) +{ + enum mlxsw_reg_spms_state spms_state = mlxsw_sp_stp_spms_state(state); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *spms_pl; + int err; spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); if (!spms_pl) @@ -3666,6 +3666,15 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_lag_init; } + /* Initialize SPAN before router and switchdev, so that those components + * can call mlxsw_sp_span_respin(). + */ + err = mlxsw_sp_span_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); + goto err_span_init; + } + err = mlxsw_sp_switchdev_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); @@ -3684,15 +3693,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_afa_init; } - err = mlxsw_sp_span_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); - goto err_span_init; - } - - /* Initialize router after SPAN is initialized, so that the FIB and - * neighbor event handlers can issue SPAN respin. - */ err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3739,14 +3739,14 @@ err_acl_init: err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: - mlxsw_sp_span_fini(mlxsw_sp); -err_span_init: mlxsw_sp_afa_fini(mlxsw_sp); err_afa_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); err_counter_pool_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: + mlxsw_sp_span_fini(mlxsw_sp); +err_span_init: mlxsw_sp_lag_fini(mlxsw_sp); err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); @@ -3768,10 +3768,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_acl_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); - mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 804d4d2c8031..4a519d8edec8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -364,6 +364,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, u32 maxrate); +enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state); int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, u8 state); int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1904c0323d39..8028d221aece 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry { struct mlxsw_sp_rt6 { struct list_head list; - struct rt6_info *rt; + struct fib6_info *rt; }; struct mlxsw_sp_lpm_tree { @@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, struct in6_addr *gw; int ifindex, weight; - ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex; - weight = mlxsw_sp_rt6->rt->rt6i_nh_weight; - gw = &mlxsw_sp_rt6->rt->rt6i_gateway; + ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex; + weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight; + gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw; if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, weight)) return false; @@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->dst.dev; + dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev; val ^= dev->ifindex; } @@ -3834,11 +3834,11 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, for (i = 0; i < nh_grp->count; i++) { struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->dst.dev && + if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->rt6i_gateway)) + &rt->fib6_nh.nh_gw)) return nh; continue; } @@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; return; } @@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); if (nh && nh->offloaded) - mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; else - mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -3920,9 +3920,9 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; - rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4699,29 +4699,29 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) +static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) { /* Packets with link-local destination IP arriving to the router * are trapped to the CPU, so no need to program specific routes * for them. */ - if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL) + if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL) return true; /* Multicast routes aren't supported, so ignore them. Neighbour * Discovery packets are specifically trapped. */ - if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) + if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST) return true; /* Cloned routes are irrelevant in the forwarding path. */ - if (rt->rt6i_flags & RTF_CACHE) + if (rt->fib6_flags & RTF_CACHE) return true; return false; } -static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) * memory. */ mlxsw_sp_rt6->rt = rt; - rt6_hold(rt); + fib6_info_hold(rt); return mlxsw_sp_rt6; } #if IS_ENABLED(CONFIG_IPV6) -static void mlxsw_sp_rt6_release(struct rt6_info *rt) +static void mlxsw_sp_rt6_release(struct fib6_info *rt) { - rt6_release(rt); + fib6_info_release(rt); } #else -static void mlxsw_sp_rt6_release(struct rt6_info *rt) +static void mlxsw_sp_rt6_release(struct fib6_info *rt) { } #endif @@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) kfree(mlxsw_sp_rt6); } -static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt) +static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) { /* RTF_CACHE routes are ignored */ - return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; + return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; } -static struct rt6_info * +static struct fib6_info * mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) { return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, @@ -4771,7 +4771,7 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt, bool replace) + const struct fib6_info *nrt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; @@ -4779,21 +4779,21 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, return NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same * virtual router. */ - if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) continue; - if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) break; - if (rt->rt6i_metric < nrt->rt6i_metric) + if (rt->fib6_metric < nrt->fib6_metric) continue; - if (rt->rt6i_metric == nrt->rt6i_metric && + if (rt->fib6_metric == nrt->fib6_metric && mlxsw_sp_fib6_rt_can_mp(rt)) return fib6_entry; - if (rt->rt6i_metric > nrt->rt6i_metric) + if (rt->fib6_metric > nrt->fib6_metric) break; } @@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, static struct mlxsw_sp_rt6 * mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, - const struct rt6_info *rt) + const struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -4815,21 +4815,21 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, } static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt, + const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->dst.dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret); + return rt->fib6_nh.nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret); } static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, - const struct rt6_info *rt) + const struct fib6_info *rt) { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->dst.dev; + struct net_device *dev = rt->fib6_nh.nh_dev; struct mlxsw_sp_rif *rif; int err; @@ -4870,13 +4870,13 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, - const struct rt6_info *rt) + const struct fib6_info *rt) { - struct net_device *dev = rt->dst.dev; + struct net_device *dev = rt->fib6_nh.nh_dev; nh->nh_grp = nh_grp; - nh->nh_weight = rt->rt6i_nh_weight; - memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + nh->nh_weight = rt->fib6_nh.nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -4897,9 +4897,9 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt) + const struct fib6_info *rt) { - return rt->rt6i_flags & RTF_GATEWAY || + return rt->fib6_flags & RTF_GATEWAY || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } @@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); nh_grp->count = fib6_entry->nrt6; for (i = 0; i < nh_grp->count; i++) { - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; nh = &nh_grp->nexthops[i]; err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); @@ -5040,7 +5040,7 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; int err; @@ -5068,7 +5068,7 @@ err_nexthop6_group_update: static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, - const struct rt6_info *rt) + const struct fib6_info *rt) { /* Packets hitting RTF_REJECT routes need to be discarded by the * stack. We can rely on their destination device not having a @@ -5092,9 +5092,9 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, * local, which will cause them to be trapped with a lower * priority than packets that need to be locally received. */ - if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; - else if (rt->rt6i_flags & RTF_REJECT) + else if (rt->fib6_flags & RTF_REJECT) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; @@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; @@ -5168,25 +5168,25 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt, bool replace) + const struct fib6_info *nrt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) continue; - if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) break; - if (replace && rt->rt6i_metric == nrt->rt6i_metric) { + if (replace && rt->fib6_metric == nrt->fib6_metric) { if (mlxsw_sp_fib6_rt_can_mp(rt) == mlxsw_sp_fib6_rt_can_mp(nrt)) return fib6_entry; if (mlxsw_sp_fib6_rt_can_mp(nrt)) fallback = fallback ?: fib6_entry; } - if (rt->rt6i_metric > nrt->rt6i_metric) + if (rt->fib6_metric > nrt->fib6_metric) return fallback ?: fib6_entry; } @@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, bool replace) { struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; - struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); + struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct mlxsw_sp_fib6_entry *fib6_entry; fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); @@ -5213,9 +5213,9 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, struct mlxsw_sp_fib6_entry *last; list_for_each_entry(last, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last); - if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id) + if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id) break; fib6_entry = last; } @@ -5268,29 +5268,29 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt) + const struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; - vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id); + vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id); if (!vr) return NULL; fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6); - fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr, - sizeof(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen); if (!fib_node) return NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id && - rt->rt6i_metric == iter_rt->rt6i_metric && + if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id && + rt->fib6_metric == iter_rt->fib6_metric && mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) return fib6_entry; } @@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct rt6_info *rt, bool replace) + struct fib6_info *rt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5325,16 +5325,16 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp->router->aborted) return 0; - if (rt->rt6i_src.plen) + if (rt->fib6_src.plen) return -EINVAL; if (mlxsw_sp_fib6_rt_should_ignore(rt)) return 0; - fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id, - &rt->rt6i_dst.addr, - sizeof(rt->rt6i_dst.addr), - rt->rt6i_dst.plen, + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, + &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, MLXSW_SP_L3_PROTO_IPV6); if (IS_ERR(fib_node)) return PTR_ERR(fib_node); @@ -5373,7 +5373,7 @@ err_fib6_entry_nexthop_add: } static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, fen6_info = container_of(info, struct fib6_entry_notifier_info, info); fib_work->fen6_info = *fen6_info; - rt6_hold(fib_work->fen6_info.rt); + fib6_info_hold(fib_work->fen6_info.rt); break; } } @@ -5882,24 +5882,24 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, switch (info->family) { case AF_INET: if (!fib4_rule_default(rule) && !rule->l3mdev) - err = -1; + err = -EOPNOTSUPP; break; case AF_INET6: if (!fib6_rule_default(rule) && !rule->l3mdev) - err = -1; + err = -EOPNOTSUPP; break; case RTNL_FAMILY_IPMR: if (!ipmr_rule_default(rule) && !rule->l3mdev) - err = -1; + err = -EOPNOTSUPP; break; case RTNL_FAMILY_IP6MR: if (!ip6mr_rule_default(rule) && !rule->l3mdev) - err = -1; + err = -EOPNOTSUPP; break; } if (err < 0) - NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload"); + NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported"); return err; } @@ -5926,8 +5926,15 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, case FIB_EVENT_RULE_DEL: err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); - if (!err) - return NOTIFY_DONE; + if (!err || info->extack) + return notifier_from_errno(err); + break; + case FIB_EVENT_ENTRY_ADD: + if (router->aborted) { + NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); + return notifier_from_errno(-EINVAL); + } + break; } fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 65a77708ff61..cd9071ee19ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -32,6 +32,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/if_bridge.h> #include <linux/list.h> #include <net/arp.h> #include <net/gre.h> @@ -39,8 +40,9 @@ #include <net/ip6_tunnel.h> #include "spectrum.h" -#include "spectrum_span.h" #include "spectrum_ipip.h" +#include "spectrum_span.h" +#include "spectrum_switchdev.h" int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) { @@ -167,6 +169,72 @@ mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp) return 0; } +static struct net_device * +mlxsw_sp_span_entry_bridge_8021q(const struct net_device *br_dev, + unsigned char *dmac, + u16 *p_vid) +{ + struct bridge_vlan_info vinfo; + struct net_device *edev; + u16 pvid; + + if (WARN_ON(br_vlan_get_pvid(br_dev, &pvid))) + return NULL; + if (!pvid) + return NULL; + + edev = br_fdb_find_port(br_dev, dmac, pvid); + if (!edev) + return NULL; + + if (br_vlan_get_info(edev, pvid, &vinfo)) + return NULL; + if (!(vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED)) + *p_vid = pvid; + return edev; +} + +static struct net_device * +mlxsw_sp_span_entry_bridge_8021d(const struct net_device *br_dev, + unsigned char *dmac) +{ + return br_fdb_find_port(br_dev, dmac, 0); +} + +static struct net_device * +mlxsw_sp_span_entry_bridge(const struct net_device *br_dev, + unsigned char dmac[ETH_ALEN], + u16 *p_vid) +{ + struct mlxsw_sp_bridge_port *bridge_port; + enum mlxsw_reg_spms_state spms_state; + struct mlxsw_sp_port *port; + struct net_device *dev; + u8 stp_state; + + if (br_vlan_enabled(br_dev)) + dev = mlxsw_sp_span_entry_bridge_8021q(br_dev, dmac, p_vid); + else + dev = mlxsw_sp_span_entry_bridge_8021d(br_dev, dmac); + if (!dev) + return NULL; + + port = mlxsw_sp_port_dev_lower_find(dev); + if (!port) + return NULL; + + bridge_port = mlxsw_sp_bridge_port_find(port->mlxsw_sp->bridge, dev); + if (!bridge_port) + return NULL; + + stp_state = mlxsw_sp_bridge_port_stp_state(bridge_port); + spms_state = mlxsw_sp_stp_spms_state(stp_state); + if (spms_state != MLXSW_REG_SPMS_STATE_FORWARDING) + return NULL; + + return dev; +} + static __maybe_unused int mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev, union mlxsw_sp_l3addr saddr, @@ -177,13 +245,22 @@ mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev, struct mlxsw_sp_span_parms *sparmsp) { unsigned char dmac[ETH_ALEN]; + u16 vid = 0; if (mlxsw_sp_l3addr_is_zero(gw)) gw = daddr; - if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) || - mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac)) - return mlxsw_sp_span_entry_unoffloadable(sparmsp); + if (!l3edev || mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac)) + goto unoffloadable; + + if (netif_is_bridge_master(l3edev)) { + l3edev = mlxsw_sp_span_entry_bridge(l3edev, dmac, &vid); + if (!l3edev) + goto unoffloadable; + } + + if (!mlxsw_sp_port_dev_check(l3edev)) + goto unoffloadable; sparmsp->dest_port = netdev_priv(l3edev); sparmsp->ttl = ttl; @@ -191,7 +268,11 @@ mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev, memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN); sparmsp->saddr = saddr; sparmsp->daddr = daddr; + sparmsp->vid = vid; return 0; + +unoffloadable: + return mlxsw_sp_span_entry_unoffloadable(sparmsp); } #if IS_ENABLED(CONFIG_NET_IPGRE) @@ -268,9 +349,10 @@ mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry, /* Create a new port analayzer entry for local_port. */ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); + mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid); mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl, MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER, - sparms.dmac, false); + sparms.dmac, !!sparms.vid); mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl, sparms.ttl, sparms.smac, be32_to_cpu(sparms.saddr.addr4), @@ -368,9 +450,10 @@ mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry, /* Create a new port analayzer entry for local_port. */ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); + mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid); mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl, MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER, - sparms.dmac, false); + sparms.dmac, !!sparms.vid); mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac, sparms.saddr.addr6, sparms.daddr.addr6); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index 4b87ec20e658..14a6de904db1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -63,6 +63,7 @@ struct mlxsw_sp_span_parms { unsigned char smac[ETH_ALEN]; union mlxsw_sp_l3addr daddr; union mlxsw_sp_l3addr saddr; + u16 vid; }; struct mlxsw_sp_span_entry_ops; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 4ed01182a82c..8c9cf8ee9398 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -49,7 +49,9 @@ #include <linux/netlink.h> #include <net/switchdev.h> +#include "spectrum_span.h" #include "spectrum_router.h" +#include "spectrum_switchdev.h" #include "spectrum.h" #include "core.h" #include "reg.h" @@ -239,7 +241,7 @@ __mlxsw_sp_bridge_port_find(const struct mlxsw_sp_bridge_device *bridge_device, return NULL; } -static struct mlxsw_sp_bridge_port * +struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, struct net_device *brport_dev) { @@ -922,6 +924,9 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, break; } + if (switchdev_trans_ph_commit(trans)) + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + return err; } @@ -1646,18 +1651,57 @@ mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, } } +struct mlxsw_sp_span_respin_work { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; +}; + +static void mlxsw_sp_span_respin_work(struct work_struct *work) +{ + struct mlxsw_sp_span_respin_work *respin_work = + container_of(work, struct mlxsw_sp_span_respin_work, work); + + rtnl_lock(); + mlxsw_sp_span_respin(respin_work->mlxsw_sp); + rtnl_unlock(); + kfree(respin_work); +} + +static void mlxsw_sp_span_respin_schedule(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_span_respin_work *respin_work; + + respin_work = kzalloc(sizeof(*respin_work), GFP_ATOMIC); + if (!respin_work) + return; + + INIT_WORK(&respin_work->work, mlxsw_sp_span_respin_work); + respin_work->mlxsw_sp = mlxsw_sp; + + mlxsw_core_schedule_work(&respin_work->work); +} + static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + const struct switchdev_obj_port_vlan *vlan; int err = 0; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, - SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans); + + if (switchdev_trans_ph_commit(trans)) { + /* The event is emitted before the changes are actually + * applied to the bridge. Therefore schedule the respin + * call for later, so that the respin logic sees the + * updated bridge state. + */ + mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp); + } break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = mlxsw_sp_port_mdb_add(mlxsw_sp_port, @@ -1806,6 +1850,8 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, break; } + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + return err; } @@ -2222,6 +2268,8 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; + if (!fdb_info->added_by_user) + break; err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true); if (err) break; @@ -2231,10 +2279,20 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work) break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; + if (!fdb_info->added_by_user) + break; mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false); break; + case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */ + case SWITCHDEV_FDB_DEL_TO_BRIDGE: + /* These events are only used to potentially update an existing + * SPAN mirror. + */ + break; } + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + out: rtnl_unlock(); kfree(switchdev_work->fdb_info.addr); @@ -2263,7 +2321,9 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, switch (event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ - case SWITCHDEV_FDB_DEL_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: /* fall through */ + case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */ + case SWITCHDEV_FDB_DEL_TO_BRIDGE: memcpy(&switchdev_work->fdb_info, ptr, sizeof(switchdev_work->fdb_info)); switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); @@ -2295,6 +2355,12 @@ static struct notifier_block mlxsw_sp_switchdev_notifier = { .notifier_call = mlxsw_sp_switchdev_event, }; +u8 +mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port) +{ + return bridge_port->stp_state; +} + static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h new file mode 100644 index 000000000000..bc44d5effc28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 + * drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/netdevice.h> + +struct mlxsw_sp_bridge; +struct mlxsw_sp_bridge_port; + +struct mlxsw_sp_bridge_port * +mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, + struct net_device *brport_dev); + +u8 mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 29b4e5f8c102..65f0791cae0c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1214,45 +1214,83 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return 0; } -static int -wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - enum br_mask br_mask, bool swap) +static const struct jmp_code_map { + enum br_mask br_mask; + bool swap; +} jmp_code_map[] = { + [BPF_JGT >> 4] = { BR_BLO, true }, + [BPF_JGE >> 4] = { BR_BHS, false }, + [BPF_JLT >> 4] = { BR_BLO, false }, + [BPF_JLE >> 4] = { BR_BHS, true }, + [BPF_JSGT >> 4] = { BR_BLT, true }, + [BPF_JSGE >> 4] = { BR_BGE, false }, + [BPF_JSLT >> 4] = { BR_BLT, false }, + [BPF_JSLE >> 4] = { BR_BGE, true }, +}; + +static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta) +{ + unsigned int op; + + op = BPF_OP(meta->insn.code) >> 4; + /* br_mask of 0 is BR_BEQ which we don't use in jump code table */ + if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) || + !jmp_code_map[op].br_mask, + "no code found for jump instruction")) + return NULL; + + return &jmp_code_map[op]; +} + +static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ + const struct jmp_code_map *code; + enum alu_op alu_op, carry_op; u8 reg = insn->dst_reg * 2; swreg tmp_reg; + code = nfp_jmp_code_get(meta); + if (!code) + return -EINVAL; + + alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB; + carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C; + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); - if (!swap) - emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); + if (!code->swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg); else - emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); + emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg)); tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); - if (!swap) + if (!code->swap) emit_alu(nfp_prog, reg_none(), - reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); + reg_a(reg + 1), carry_op, tmp_reg); else emit_alu(nfp_prog, reg_none(), - tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); + tmp_reg, carry_op, reg_a(reg + 1)); - emit_br(nfp_prog, br_mask, insn->off, 0); + emit_br(nfp_prog, code->br_mask, insn->off, 0); return 0; } -static int -wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - enum br_mask br_mask, bool swap) +static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + const struct jmp_code_map *code; u8 areg, breg; + code = nfp_jmp_code_get(meta); + if (!code) + return -EINVAL; + areg = insn->dst_reg * 2; breg = insn->src_reg * 2; - if (swap) { + if (code->swap) { areg ^= breg; breg ^= areg; areg ^= breg; @@ -1261,7 +1299,7 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); emit_alu(nfp_prog, reg_none(), reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); - emit_br(nfp_prog, br_mask, insn->off, 0); + emit_br(nfp_prog, code->br_mask, insn->off, 0); return 0; } @@ -1400,7 +1438,7 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) if (!load_lm_ptr) return 0; - emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); wrp_nops(nfp_prog, 3); return 0; @@ -2283,46 +2321,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } -static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); -} - -static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); -} - -static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); -} - -static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); -} - -static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true); -} - -static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false); -} - -static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false); -} - -static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true); -} - static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -2392,46 +2390,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } -static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); -} - -static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); -} - -static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); -} - -static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); -} - -static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true); -} - -static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false); -} - -static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false); -} - -static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true); -} - static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); @@ -2520,25 +2478,25 @@ static const instr_cb_t instr_cb[256] = { [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, [BPF_JMP | BPF_JA | BPF_K] = jump, [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, - [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, - [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, - [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm, - [BPF_JMP | BPF_JLE | BPF_K] = jle_imm, - [BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm, - [BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm, - [BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm, - [BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm, + [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm, + [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm, [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, - [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, - [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, - [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg, - [BPF_JMP | BPF_JLE | BPF_X] = jle_reg, - [BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg, - [BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg, - [BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg, - [BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg, + [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg, + [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, [BPF_JMP | BPF_CALL] = call, @@ -2777,6 +2735,54 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) } } +/* abs(insn.imm) will fit better into unrestricted reg immediate - + * convert add/sub of a negative number into a sub/add of a positive one. + */ +static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + if (meta->skip) + continue; + + if (BPF_CLASS(insn.code) != BPF_ALU && + BPF_CLASS(insn.code) != BPF_ALU64 && + BPF_CLASS(insn.code) != BPF_JMP) + continue; + if (BPF_SRC(insn.code) != BPF_K) + continue; + if (insn.imm >= 0) + continue; + + if (BPF_CLASS(insn.code) == BPF_JMP) { + switch (BPF_OP(insn.code)) { + case BPF_JGE: + case BPF_JSGE: + case BPF_JLT: + case BPF_JSLT: + meta->jump_neg_op = true; + break; + default: + continue; + } + } else { + if (BPF_OP(insn.code) == BPF_ADD) + insn.code = BPF_CLASS(insn.code) | BPF_SUB; + else if (BPF_OP(insn.code) == BPF_SUB) + insn.code = BPF_CLASS(insn.code) | BPF_ADD; + else + continue; + + meta->insn.code = insn.code | BPF_K; + } + + meta->insn.imm = -insn.imm; + } +} + /* Remove masking after load since our load guarantees this is not needed */ static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) { @@ -3212,6 +3218,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) { nfp_bpf_opt_reg_init(nfp_prog); + nfp_bpf_opt_neg_add_sub(nfp_prog); nfp_bpf_opt_ld_mask(nfp_prog); nfp_bpf_opt_ld_shift(nfp_prog); nfp_bpf_opt_ldst_gather(nfp_prog); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 4981c8944ca3..68b5d326483d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -236,6 +236,7 @@ struct nfp_bpf_reg_state { * @xadd_over_16bit: 16bit immediate is not guaranteed * @xadd_maybe_16bit: 16bit immediate is possible * @jmp_dst: destination info for jump instructions + * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB * @func_id: function id for call instructions * @arg1: arg1 for call instructions * @arg2: arg2 for call instructions @@ -264,7 +265,10 @@ struct nfp_insn_meta { bool xadd_maybe_16bit; }; /* jump */ - struct nfp_insn_meta *jmp_dst; + struct { + struct nfp_insn_meta *jmp_dst; + bool jump_neg_op; + }; /* function calls */ struct { u32 func_id; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index c67e1b54c614..733ff53cc601 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -47,6 +47,7 @@ struct net_device; struct nfp_app; +#define NFP_FL_STATS_CTX_DONT_CARE cpu_to_be32(0xffffffff) #define NFP_FL_STATS_ENTRY_RS BIT(20) #define NFP_FL_STATS_ELEM_RS 4 #define NFP_FL_REPEATED_HASH_MAX BIT(17) @@ -189,9 +190,11 @@ struct nfp_fl_payload { spinlock_t lock; /* lock stats */ struct nfp_fl_stats stats; __be32 nfp_tun_ipv4_addr; + struct net_device *ingress_dev; char *unmasked_data; char *mask_data; char *action_data; + bool ingress_offload; }; struct nfp_fl_stats_frame { @@ -216,12 +219,14 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_flow); int nfp_compile_flow_metadata(struct nfp_app *app, struct tc_cls_flower_offload *flow, - struct nfp_fl_payload *nfp_flow); + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev); int nfp_modify_flow_metadata(struct nfp_app *app, struct nfp_fl_payload *nfp_flow); struct nfp_fl_payload * -nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev, __be32 host_ctx); struct nfp_fl_payload * nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index db977cf8e933..21668aa435e8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -99,14 +99,18 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id) /* Must be called with either RTNL or rcu_read_lock */ struct nfp_fl_payload * -nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie) +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev, __be32 host_ctx) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload *flower_entry; hash_for_each_possible_rcu(priv->flow_table, flower_entry, link, tc_flower_cookie) - if (flower_entry->tc_flower_cookie == tc_flower_cookie) + if (flower_entry->tc_flower_cookie == tc_flower_cookie && + (!netdev || flower_entry->ingress_dev == netdev) && + (host_ctx == NFP_FL_STATS_CTX_DONT_CARE || + flower_entry->meta.host_ctx_id == host_ctx)) return flower_entry; return NULL; @@ -121,13 +125,11 @@ nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats) flower_cookie = be64_to_cpu(stats->stats_cookie); rcu_read_lock(); - nfp_flow = nfp_flower_search_fl_table(app, flower_cookie); + nfp_flow = nfp_flower_search_fl_table(app, flower_cookie, NULL, + stats->stats_con_id); if (!nfp_flow) goto exit_rcu_unlock; - if (nfp_flow->meta.host_ctx_id != stats->stats_con_id) - goto exit_rcu_unlock; - spin_lock(&nfp_flow->lock); nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count); nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count); @@ -317,7 +319,8 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, int nfp_compile_flow_metadata(struct nfp_app *app, struct tc_cls_flower_offload *flow, - struct nfp_fl_payload *nfp_flow) + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload *check_entry; @@ -348,7 +351,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app, nfp_flow->stats.bytes = 0; nfp_flow->stats.used = jiffies; - check_entry = nfp_flower_search_fl_table(app, flow->cookie); + check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev, + NFP_FL_STATS_CTX_DONT_CARE); if (check_entry) { if (nfp_release_stats_entry(app, stats_cxt)) return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 114d2ab02a38..70ec9d821b91 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -345,7 +345,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } static struct nfp_fl_payload * -nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer, bool egress) { struct nfp_fl_payload *flow_pay; @@ -371,6 +371,8 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) flow_pay->meta.flags = 0; spin_lock_init(&flow_pay->lock); + flow_pay->ingress_offload = !egress; + return flow_pay; err_free_mask: @@ -402,8 +404,20 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload *flow_pay; struct nfp_fl_key_ls *key_layer; + struct net_device *ingr_dev; int err; + ingr_dev = egress ? NULL : netdev; + flow_pay = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev, + NFP_FL_STATS_CTX_DONT_CARE); + if (flow_pay) { + /* Ignore as duplicate if it has been added by different cb. */ + if (flow_pay->ingress_offload && egress) + return 0; + else + return -EOPNOTSUPP; + } + key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL); if (!key_layer) return -ENOMEM; @@ -413,12 +427,14 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_free_key_ls; - flow_pay = nfp_flower_allocate_new(key_layer); + flow_pay = nfp_flower_allocate_new(key_layer, egress); if (!flow_pay) { err = -ENOMEM; goto err_free_key_ls; } + flow_pay->ingress_dev = egress ? NULL : netdev; + err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay, tun_type); if (err) @@ -428,7 +444,8 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_destroy_flow; - err = nfp_compile_flow_metadata(app, flow, flow_pay); + err = nfp_compile_flow_metadata(app, flow, flow_pay, + flow_pay->ingress_dev); if (err) goto err_destroy_flow; @@ -462,6 +479,7 @@ err_free_key_ls: * @app: Pointer to the APP handle * @netdev: netdev structure. * @flow: TC flower classifier offload structure + * @egress: Netdev is the egress dev. * * Removes a flow from the repeated hash structure and clears the * action payload. @@ -470,15 +488,18 @@ err_free_key_ls: */ static int nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, - struct tc_cls_flower_offload *flow) + struct tc_cls_flower_offload *flow, bool egress) { struct nfp_port *port = nfp_port_from_netdev(netdev); struct nfp_fl_payload *nfp_flow; + struct net_device *ingr_dev; int err; - nfp_flow = nfp_flower_search_fl_table(app, flow->cookie); + ingr_dev = egress ? NULL : netdev; + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev, + NFP_FL_STATS_CTX_DONT_CARE); if (!nfp_flow) - return -ENOENT; + return egress ? 0 : -ENOENT; err = nfp_modify_flow_metadata(app, nfp_flow); if (err) @@ -505,7 +526,9 @@ err_free_flow: /** * nfp_flower_get_stats() - Populates flow stats obtained from hardware. * @app: Pointer to the APP handle + * @netdev: Netdev structure. * @flow: TC flower classifier offload structure + * @egress: Netdev is the egress dev. * * Populates a flow statistics structure which which corresponds to a * specific flow. @@ -513,14 +536,21 @@ err_free_flow: * Return: negative value on error, 0 if stats populated successfully. */ static int -nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow) +nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_flower_offload *flow, bool egress) { struct nfp_fl_payload *nfp_flow; + struct net_device *ingr_dev; - nfp_flow = nfp_flower_search_fl_table(app, flow->cookie); + ingr_dev = egress ? NULL : netdev; + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev, + NFP_FL_STATS_CTX_DONT_CARE); if (!nfp_flow) return -EINVAL; + if (nfp_flow->ingress_offload && egress) + return 0; + spin_lock_bh(&nfp_flow->lock); tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes, nfp_flow->stats.pkts, nfp_flow->stats.used); @@ -543,9 +573,9 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, case TC_CLSFLOWER_REPLACE: return nfp_flower_add_offload(app, netdev, flower, egress); case TC_CLSFLOWER_DESTROY: - return nfp_flower_del_offload(app, netdev, flower); + return nfp_flower_del_offload(app, netdev, flower, egress); case TC_CLSFLOWER_STATS: - return nfp_flower_get_stats(app, flower); + return nfp_flower_get_stats(app, netdev, flower, egress); } return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index c4b1f344b4da..0ade122805ad 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -486,6 +486,10 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_disable_msix; } + err = nfp_resource_table_init(pf->cpp); + if (err) + goto err_cpp_free; + pf->hwinfo = nfp_hwinfo_read(pf->cpp); dev_info(&pdev->dev, "Assembly: %s%s%s-%s CPLD: %s\n", @@ -548,6 +552,7 @@ err_fw_unload: vfree(pf->dumpspec); err_hwinfo_free: kfree(pf->hwinfo); +err_cpp_free: nfp_cpp_free(pf->cpp); err_disable_msix: destroy_workqueue(pf->wq); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1eb6549f2a54..d9111c077699 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) act = bpf_prog_run_xdp(xdp_prog, &xdp); - pkt_len -= xdp.data - orig_data; + pkt_len = xdp.data_end - xdp.data; pkt_off += xdp.data - orig_data; switch (act) { diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h index ced62d112aa2..f44d0a857314 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h @@ -94,6 +94,8 @@ int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask, /* MAC Statistics Accumulator */ #define NFP_RESOURCE_MAC_STATISTICS "mac.stat" +int nfp_resource_table_init(struct nfp_cpp *cpp); + struct nfp_resource * nfp_resource_acquire(struct nfp_cpp *cpp, const char *name); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index cd678323bacb..a0e336bd1d85 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -1330,6 +1330,7 @@ struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev) /* Finished with card initialization. */ dev_info(&pdev->dev, "Netronome Flow Processor NFP4000/NFP6000 PCIe Card Probe\n"); + pcie_print_link_status(pdev); nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); if (!nfp) { diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index c8f2c064cce3..4e19add1c539 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -295,6 +295,8 @@ void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex); int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex); int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex); int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex); +int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target, + unsigned long long address); /** * nfp_cppcore_pcie_unit() - Get PCI Unit of a CPP handle diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c index cb28ac03e4ca..c88bf673cb76 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c @@ -59,6 +59,11 @@ static u32 nfp_mutex_unlocked(u16 interface) return (u32)interface << 16 | 0x0000; } +static u32 nfp_mutex_owner(u32 val) +{ + return val >> 16; +} + static bool nfp_mutex_is_locked(u32 val) { return (val & 0xffff) == 0x000f; @@ -351,3 +356,43 @@ int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex) return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL; } + +/** + * nfp_cpp_mutex_reclaim() - Unlock mutex if held by local endpoint + * @cpp: NFP CPP handle + * @target: NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU) + * @address: Offset into the address space of the NFP CPP target ID + * + * Release lock if held by local system. Extreme care is advised, call only + * when no local lock users can exist. + * + * Return: 0 if the lock was OK, 1 if locked by us, -errno on invalid mutex + */ +int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target, + unsigned long long address) +{ + const u32 mur = NFP_CPP_ID(target, 3, 0); /* atomic_read */ + const u32 muw = NFP_CPP_ID(target, 4, 0); /* atomic_write */ + u16 interface = nfp_cpp_interface(cpp); + int err; + u32 tmp; + + err = nfp_cpp_mutex_validate(interface, &target, address); + if (err) + return err; + + /* Check lock */ + err = nfp_cpp_readl(cpp, mur, address, &tmp); + if (err < 0) + return err; + + if (nfp_mutex_is_unlocked(tmp) || nfp_mutex_owner(tmp) != interface) + return 0; + + /* Bust the lock */ + err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_unlocked(interface)); + if (err < 0) + return err; + + return 1; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c index 7e14725055c7..2dd89dba9311 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c @@ -338,3 +338,62 @@ u64 nfp_resource_size(struct nfp_resource *res) { return res->size; } + +/** + * nfp_resource_table_init() - Run initial checks on the resource table + * @cpp: NFP CPP handle + * + * Start-of-day init procedure for resource table. Must be called before + * any local resource table users may exist. + * + * Return: 0 on success, -errno on failure + */ +int nfp_resource_table_init(struct nfp_cpp *cpp) +{ + struct nfp_cpp_mutex *dev_mutex; + int i, err; + + err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, + NFP_RESOURCE_TBL_BASE); + if (err < 0) { + nfp_err(cpp, "Error: failed to reclaim resource table mutex\n"); + return err; + } + if (err) + nfp_warn(cpp, "Warning: busted main resource table mutex\n"); + + dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET, + NFP_RESOURCE_TBL_BASE, + NFP_RESOURCE_TBL_KEY); + if (!dev_mutex) + return -ENOMEM; + + if (nfp_cpp_mutex_lock(dev_mutex)) { + nfp_err(cpp, "Error: failed to claim resource table mutex\n"); + nfp_cpp_mutex_free(dev_mutex); + return -EINVAL; + } + + /* Resource 0 is the dev_mutex, start from 1 */ + for (i = 1; i < NFP_RESOURCE_TBL_ENTRIES; i++) { + u64 addr = NFP_RESOURCE_TBL_BASE + + sizeof(struct nfp_resource_entry) * i; + + err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, addr); + if (err < 0) { + nfp_err(cpp, + "Error: failed to reclaim resource %d mutex\n", + i); + goto err_unlock; + } + if (err) + nfp_warn(cpp, "Warning: busted resource %d mutex\n", i); + } + + err = 0; +err_unlock: + nfp_cpp_mutex_unlock(dev_mutex); + nfp_cpp_mutex_free(dev_mutex); + + return err; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 4926c5532fba..39124b594a36 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -419,6 +419,7 @@ struct phy_defs { #define NUM_RSS_MEM_TYPES 5 #define NUM_BIG_RAM_TYPES 3 +#define BIG_RAM_NAME_LEN 3 #define NUM_PHY_TBUS_ADDRESSES 2048 #define PHY_DUMP_SIZE_DWORDS (NUM_PHY_TBUS_ADDRESSES / 2) @@ -3650,8 +3651,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, BIT(big_ram->is_256b_bit_offset[dev_data->chip_id]) ? 256 : 128; - strscpy(type_name, big_ram->instance_name, sizeof(type_name)); - strscpy(mem_name, big_ram->instance_name, sizeof(mem_name)); + strncpy(type_name, big_ram->instance_name, BIG_RAM_NAME_LEN); + strncpy(mem_name, big_ram->instance_name, BIG_RAM_NAME_LEN); /* Dump memory header */ offset += qed_grc_dump_mem_hdr(p_hwfn, @@ -7778,6 +7779,57 @@ int qed_dbg_igu_fifo_size(struct qed_dev *cdev) return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO); } +int qed_dbg_nvm_image_length(struct qed_hwfn *p_hwfn, + enum qed_nvm_images image_id, u32 *length) +{ + struct qed_nvm_image_att image_att; + int rc; + + *length = 0; + rc = qed_mcp_get_nvm_image_att(p_hwfn, image_id, &image_att); + if (rc) + return rc; + + *length = image_att.length; + + return rc; +} + +int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes, enum qed_nvm_images image_id) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + u32 len_rounded, i; + __be32 val; + int rc; + + *num_dumped_bytes = 0; + rc = qed_dbg_nvm_image_length(p_hwfn, image_id, &len_rounded); + if (rc) + return rc; + + DP_NOTICE(p_hwfn->cdev, + "Collecting a debug feature [\"nvram image %d\"]\n", + image_id); + + len_rounded = roundup(len_rounded, sizeof(u32)); + rc = qed_mcp_get_nvm_image(p_hwfn, image_id, buffer, len_rounded); + if (rc) + return rc; + + /* QED_NVM_IMAGE_NVM_META image is not swapped like other images */ + if (image_id != QED_NVM_IMAGE_NVM_META) + for (i = 0; i < len_rounded; i += 4) { + val = cpu_to_be32(*(u32 *)(buffer + i)); + *(u32 *)(buffer + i) = val; + } + + *num_dumped_bytes = len_rounded; + + return rc; +} + int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) { @@ -7831,6 +7883,9 @@ enum debug_print_features { IGU_FIFO = 6, PHY = 7, FW_ASSERTS = 8, + NVM_CFG1 = 9, + DEFAULT_CFG = 10, + NVM_META = 11, }; static u32 qed_calc_regdump_header(enum debug_print_features feature, @@ -7965,13 +8020,61 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc); } + /* nvm cfg1 */ + rc = qed_dbg_nvm_image(cdev, + (u8 *)buffer + offset + REGDUMP_HEADER_SIZE, + &feature_size, QED_NVM_IMAGE_NVM_CFG1); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(NVM_CFG1, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else if (rc != -ENOENT) { + DP_ERR(cdev, + "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n", + QED_NVM_IMAGE_NVM_CFG1, "QED_NVM_IMAGE_NVM_CFG1", rc); + } + + /* nvm default */ + rc = qed_dbg_nvm_image(cdev, + (u8 *)buffer + offset + REGDUMP_HEADER_SIZE, + &feature_size, QED_NVM_IMAGE_DEFAULT_CFG); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(DEFAULT_CFG, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else if (rc != -ENOENT) { + DP_ERR(cdev, + "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n", + QED_NVM_IMAGE_DEFAULT_CFG, "QED_NVM_IMAGE_DEFAULT_CFG", + rc); + } + + /* nvm meta */ + rc = qed_dbg_nvm_image(cdev, + (u8 *)buffer + offset + REGDUMP_HEADER_SIZE, + &feature_size, QED_NVM_IMAGE_NVM_META); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(NVM_META, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else if (rc != -ENOENT) { + DP_ERR(cdev, + "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n", + QED_NVM_IMAGE_NVM_META, "QED_NVM_IMAGE_NVM_META", rc); + } + return 0; } int qed_dbg_all_data_size(struct qed_dev *cdev) { + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + u32 regs_len = 0, image_len = 0; u8 cur_engine, org_engine; - u32 regs_len = 0; org_engine = qed_get_debug_engine(cdev); for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { @@ -7993,6 +8096,15 @@ int qed_dbg_all_data_size(struct qed_dev *cdev) /* Engine common */ regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev); + qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_NVM_CFG1, &image_len); + if (image_len) + regs_len += REGDUMP_HEADER_SIZE + image_len; + qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_DEFAULT_CFG, &image_len); + if (image_len) + regs_len += REGDUMP_HEADER_SIZE + image_len; + qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_NVM_META, &image_len); + if (image_len) + regs_len += REGDUMP_HEADER_SIZE + image_len; return regs_len; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e874504e8b28..8b1b7e8ca56c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2850,6 +2850,24 @@ static int qed_fp_cqe_completion(struct qed_dev *dev, cqe); } +static int qed_req_bulletin_update_mac(struct qed_dev *cdev, u8 *mac) +{ + int i, ret; + + if (IS_PF(cdev)) + return 0; + + for_each_hwfn(cdev, i) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + + ret = qed_vf_pf_bulletin_update_mac(p_hwfn, mac); + if (ret) + return ret; + } + + return 0; +} + #ifdef CONFIG_QED_SRIOV extern const struct qed_iov_hv_ops qed_iov_ops_pass; #endif @@ -2887,6 +2905,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = { .ntuple_filter_config = &qed_ntuple_arfs_filter_config, .configure_arfs_searcher = &qed_configure_arfs_searcher, .get_coalesce = &qed_get_coalesce, + .req_bulletin_update_mac = &qed_req_bulletin_update_mac, }; const struct qed_eth_ops *qed_get_eth_ops(void) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 9854aa9139af..d1d3787affe8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1894,15 +1894,8 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type, u8 *buf, u16 len) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); - struct qed_ptt *ptt = qed_ptt_acquire(hwfn); - int rc; - - if (!ptt) - return -EAGAIN; - rc = qed_mcp_get_nvm_image(hwfn, ptt, type, buf, len); - qed_ptt_release(hwfn, ptt); - return rc; + return qed_mcp_get_nvm_image(hwfn, type, buf, len); } static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index ec0d425766a7..0550f0ee11b3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -2529,9 +2529,8 @@ err0: return rc; } -static int +int qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, enum qed_nvm_images image_id, struct qed_nvm_image_att *p_image_att) { @@ -2546,6 +2545,15 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, case QED_NVM_IMAGE_FCOE_CFG: type = NVM_TYPE_FCOE_CFG; break; + case QED_NVM_IMAGE_NVM_CFG1: + type = NVM_TYPE_NVM_CFG1; + break; + case QED_NVM_IMAGE_DEFAULT_CFG: + type = NVM_TYPE_DEFAULT_CFG; + break; + case QED_NVM_IMAGE_NVM_META: + type = NVM_TYPE_META; + break; default: DP_NOTICE(p_hwfn, "Unknown request of image_id %08x\n", image_id); @@ -2569,7 +2577,6 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, } int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, enum qed_nvm_images image_id, u8 *p_buffer, u32 buffer_len) { @@ -2578,7 +2585,7 @@ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, memset(p_buffer, 0, buffer_len); - rc = qed_mcp_get_nvm_image_att(p_hwfn, p_ptt, image_id, &image_att); + rc = qed_mcp_get_nvm_image_att(p_hwfn, image_id, &image_att); if (rc) return rc; @@ -2590,9 +2597,6 @@ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, return -EINVAL; } - /* Each NVM image is suffixed by CRC; Upper-layer has no need for it */ - image_att.length -= 4; - if (image_att.length > buffer_len) { DP_VERBOSE(p_hwfn, QED_MSG_STORAGE, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 8a5c988d0c3c..3af3896420b9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -486,7 +486,20 @@ struct qed_nvm_image_att { * @brief Allows reading a whole nvram image * * @param p_hwfn - * @param p_ptt + * @param image_id - image to get attributes for + * @param p_image_att - image attributes structure into which to fill data + * + * @return int - 0 - operation was successful. + */ +int +qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, + enum qed_nvm_images image_id, + struct qed_nvm_image_att *p_image_att); + +/** + * @brief Allows reading a whole nvram image + * + * @param p_hwfn * @param image_id - image requested for reading * @param p_buffer - allocated buffer into which to fill data * @param buffer_len - length of the allocated buffer. @@ -494,7 +507,6 @@ struct qed_nvm_image_att { * @return 0 iff p_buffer now contains the nvram image. */ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, enum qed_nvm_images image_id, u8 *p_buffer, u32 buffer_len); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 5acb91b3564c..f01bf52bc381 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -48,7 +48,7 @@ static int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, u8 opcode, __le16 echo, union event_ring_data *data, u8 fw_return_code); - +static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid); static u8 qed_vf_calculate_legacy(struct qed_vf_info *p_vf) { @@ -1790,7 +1790,8 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, if (!p_vf->vport_instance) return -EINVAL; - if (events & BIT(MAC_ADDR_FORCED)) { + if ((events & BIT(MAC_ADDR_FORCED)) || + p_vf->p_vf_info.is_trusted_configured) { /* Since there's no way [currently] of removing the MAC, * we can always assume this means we need to force it. */ @@ -1809,8 +1810,12 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, "PF failed to configure MAC for VF\n"); return rc; } - - p_vf->configured_features |= 1 << MAC_ADDR_FORCED; + if (p_vf->p_vf_info.is_trusted_configured) + p_vf->configured_features |= + BIT(VFPF_BULLETIN_MAC_ADDR); + else + p_vf->configured_features |= + BIT(MAC_ADDR_FORCED); } if (events & BIT(VLAN_ADDR_FORCED)) { @@ -3170,6 +3175,10 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn, if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) return 0; + /* Don't keep track of shadow copy since we don't intend to restore. */ + if (p_vf->p_vf_info.is_trusted_configured) + return 0; + /* First remove entries and then add new ones */ if (p_params->opcode == QED_FILTER_REMOVE) { for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) { @@ -3244,9 +3253,17 @@ static int qed_iov_chk_ucast(struct qed_hwfn *hwfn, /* No real decision to make; Store the configured MAC */ if (params->type == QED_FILTER_MAC || - params->type == QED_FILTER_MAC_VLAN) + params->type == QED_FILTER_MAC_VLAN) { ether_addr_copy(vf->mac, params->mac); + if (vf->is_trusted_configured) { + qed_iov_bulletin_set_mac(hwfn, vf->mac, vfid); + + /* Update and post bulleitin again */ + qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG); + } + } + return 0; } @@ -3803,6 +3820,40 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn, __qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin); } +static int +qed_iov_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_vf_info *p_vf) +{ + struct qed_bulletin_content *p_bulletin = p_vf->bulletin.p_virt; + struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx; + struct vfpf_bulletin_update_mac_tlv *p_req; + u8 status = PFVF_STATUS_SUCCESS; + int rc = 0; + + if (!p_vf->p_vf_info.is_trusted_configured) { + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "Blocking bulletin update request from untrusted VF[%d]\n", + p_vf->abs_vf_id); + status = PFVF_STATUS_NOT_SUPPORTED; + rc = -EINVAL; + goto send_status; + } + + p_req = &mbx->req_virt->bulletin_update_mac; + ether_addr_copy(p_bulletin->mac, p_req->mac); + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "Updated bulletin of VF[%d] with requested MAC[%pM]\n", + p_vf->abs_vf_id, p_req->mac); + +send_status: + qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf, + CHANNEL_TLV_BULLETIN_UPDATE_MAC, + sizeof(struct pfvf_def_resp_tlv), status); + return rc; +} + static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, int vfid) { @@ -3882,6 +3933,9 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, case CHANNEL_TLV_COALESCE_READ: qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf); break; + case CHANNEL_TLV_BULLETIN_UPDATE_MAC: + qed_iov_vf_pf_bulletin_update_mac(p_hwfn, p_ptt, p_vf); + break; } } else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, @@ -4081,16 +4135,60 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn, return; } - feature = 1 << MAC_ADDR_FORCED; + if (vf_info->p_vf_info.is_trusted_configured) { + feature = BIT(VFPF_BULLETIN_MAC_ADDR); + /* Trust mode will disable Forced MAC */ + vf_info->bulletin.p_virt->valid_bitmap &= + ~BIT(MAC_ADDR_FORCED); + } else { + feature = BIT(MAC_ADDR_FORCED); + /* Forced MAC will disable MAC_ADDR */ + vf_info->bulletin.p_virt->valid_bitmap &= + ~BIT(VFPF_BULLETIN_MAC_ADDR); + } + memcpy(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN); vf_info->bulletin.p_virt->valid_bitmap |= feature; - /* Forced MAC will disable MAC_ADDR */ - vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR); qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); } +static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid) +{ + struct qed_vf_info *vf_info; + u64 feature; + + vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true); + if (!vf_info) { + DP_NOTICE(p_hwfn->cdev, "Can not set MAC, invalid vfid [%d]\n", + vfid); + return -EINVAL; + } + + if (vf_info->b_malicious) { + DP_NOTICE(p_hwfn->cdev, "Can't set MAC to malicious VF [%d]\n", + vfid); + return -EINVAL; + } + + if (vf_info->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "Can not set MAC, Forced MAC is configured\n"); + return -EINVAL; + } + + feature = BIT(VFPF_BULLETIN_MAC_ADDR); + ether_addr_copy(vf_info->bulletin.p_virt->mac, mac); + + vf_info->bulletin.p_virt->valid_bitmap |= feature; + + if (vf_info->p_vf_info.is_trusted_configured) + qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); + + return 0; +} + static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, u16 pvid, int vfid) { @@ -4204,6 +4302,21 @@ out: return rc; } +static u8 *qed_iov_bulletin_get_mac(struct qed_hwfn *p_hwfn, u16 rel_vf_id) +{ + struct qed_vf_info *p_vf; + + p_vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, true); + if (!p_vf || !p_vf->bulletin.p_virt) + return NULL; + + if (!(p_vf->bulletin.p_virt->valid_bitmap & + BIT(VFPF_BULLETIN_MAC_ADDR))) + return NULL; + + return p_vf->bulletin.p_virt->mac; +} + static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn, u16 rel_vf_id) { @@ -4493,8 +4606,12 @@ static int qed_sriov_pf_set_mac(struct qed_dev *cdev, u8 *mac, int vfid) if (!vf_info) continue; - /* Set the forced MAC, and schedule the IOV task */ - ether_addr_copy(vf_info->forced_mac, mac); + /* Set the MAC, and schedule the IOV task */ + if (vf_info->is_trusted_configured) + ether_addr_copy(vf_info->mac, mac); + else + ether_addr_copy(vf_info->forced_mac, mac); + qed_schedule_iov(hwfn, QED_IOV_WQ_SET_UNICAST_FILTER_FLAG); } @@ -4802,6 +4919,33 @@ static void qed_handle_vf_msg(struct qed_hwfn *hwfn) qed_ptt_release(hwfn, ptt); } +static bool qed_pf_validate_req_vf_mac(struct qed_hwfn *hwfn, + u8 *mac, + struct qed_public_vf_info *info) +{ + if (info->is_trusted_configured) { + if (is_valid_ether_addr(info->mac) && + (!mac || !ether_addr_equal(mac, info->mac))) + return true; + } else { + if (is_valid_ether_addr(info->forced_mac) && + (!mac || !ether_addr_equal(mac, info->forced_mac))) + return true; + } + + return false; +} + +static void qed_set_bulletin_mac(struct qed_hwfn *hwfn, + struct qed_public_vf_info *info, + int vfid) +{ + if (info->is_trusted_configured) + qed_iov_bulletin_set_mac(hwfn, info->mac, vfid); + else + qed_iov_bulletin_set_forced_mac(hwfn, info->forced_mac, vfid); +} + static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn) { int i; @@ -4816,18 +4960,20 @@ static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn) continue; /* Update data on bulletin board */ - mac = qed_iov_bulletin_get_forced_mac(hwfn, i); - if (is_valid_ether_addr(info->forced_mac) && - (!mac || !ether_addr_equal(mac, info->forced_mac))) { + if (info->is_trusted_configured) + mac = qed_iov_bulletin_get_mac(hwfn, i); + else + mac = qed_iov_bulletin_get_forced_mac(hwfn, i); + + if (qed_pf_validate_req_vf_mac(hwfn, mac, info)) { DP_VERBOSE(hwfn, QED_MSG_IOV, "Handling PF setting of VF MAC to VF 0x%02x [Abs 0x%02x]\n", i, hwfn->cdev->p_iov_info->first_vf_in_pf + i); - /* Update bulletin board with forced MAC */ - qed_iov_bulletin_set_forced_mac(hwfn, - info->forced_mac, i); + /* Update bulletin board with MAC */ + qed_set_bulletin_mac(hwfn, info, i); update = true; } @@ -4867,6 +5013,72 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn) qed_ptt_release(hwfn, ptt); } +static void qed_update_mac_for_vf_trust_change(struct qed_hwfn *hwfn, int vf_id) +{ + struct qed_public_vf_info *vf_info; + struct qed_vf_info *vf; + u8 *force_mac; + int i; + + vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true); + vf = qed_iov_get_vf_info(hwfn, vf_id, true); + + if (!vf_info || !vf) + return; + + /* Force MAC converted to generic MAC in case of VF trust on */ + if (vf_info->is_trusted_configured && + (vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))) { + force_mac = qed_iov_bulletin_get_forced_mac(hwfn, vf_id); + + if (force_mac) { + /* Clear existing shadow copy of MAC to have a clean + * slate. + */ + for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) { + if (ether_addr_equal(vf->shadow_config.macs[i], + vf_info->mac)) { + memset(vf->shadow_config.macs[i], 0, + ETH_ALEN); + DP_VERBOSE(hwfn, QED_MSG_IOV, + "Shadow MAC %pM removed for VF 0x%02x, VF trust mode is ON\n", + vf_info->mac, vf_id); + break; + } + } + + ether_addr_copy(vf_info->mac, force_mac); + memset(vf_info->forced_mac, 0, ETH_ALEN); + vf->bulletin.p_virt->valid_bitmap &= + ~BIT(MAC_ADDR_FORCED); + qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG); + } + } + + /* Update shadow copy with VF MAC when trust mode is turned off */ + if (!vf_info->is_trusted_configured) { + u8 empty_mac[ETH_ALEN]; + + memset(empty_mac, 0, ETH_ALEN); + for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) { + if (ether_addr_equal(vf->shadow_config.macs[i], + empty_mac)) { + ether_addr_copy(vf->shadow_config.macs[i], + vf_info->mac); + DP_VERBOSE(hwfn, QED_MSG_IOV, + "Shadow is updated with %pM for VF 0x%02x, VF trust mode is OFF\n", + vf_info->mac, vf_id); + break; + } + } + /* Clear bulletin when trust mode is turned off, + * to have a clean slate for next (normal) operations. + */ + qed_iov_bulletin_set_mac(hwfn, empty_mac, vf_id); + qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG); + } +} + static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) { struct qed_sp_vport_update_params params; @@ -4890,6 +5102,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) continue; vf_info->is_trusted_configured = vf_info->is_trusted_request; + /* Handle forced MAC mode */ + qed_update_mac_for_vf_trust_change(hwfn, i); + /* Validate that the VF has a configured vport */ vf = qed_iov_get_vf_info(hwfn, i, true); if (!vf->vport_instance) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 91b5e9f02a62..2d7fcd6a0777 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1375,6 +1375,35 @@ exit: } int +qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, + u8 *p_mac) +{ + struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info; + struct vfpf_bulletin_update_mac_tlv *p_req; + struct pfvf_def_resp_tlv *p_resp; + int rc; + + if (!p_mac) + return -EINVAL; + + /* clear mailbox and prep header tlv */ + p_req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_BULLETIN_UPDATE_MAC, + sizeof(*p_req)); + ether_addr_copy(p_req->mac, p_mac); + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "Requesting bulletin update for MAC[%pM]\n", p_mac); + + /* add list termination tlv */ + qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); + + p_resp = &p_iov->pf2vf_reply->default_resp; + rc = qed_send_msg2pf(p_hwfn, &p_resp->hdr.status, sizeof(*p_resp)); + qed_vf_pf_req_end(p_hwfn, rc); + return rc; +} + +int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn, u16 rx_coal, u16 tx_coal, struct qed_queue_cid *p_cid) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 97d44dfb38ca..4f05d5eb3cf5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -518,6 +518,12 @@ struct pfvf_read_coal_resp_tlv { u8 padding[6]; }; +struct vfpf_bulletin_update_mac_tlv { + struct vfpf_first_tlv first_tlv; + u8 mac[ETH_ALEN]; + u8 padding[2]; +}; + union vfpf_tlvs { struct vfpf_first_tlv first_tlv; struct vfpf_acquire_tlv acquire; @@ -532,6 +538,7 @@ union vfpf_tlvs { struct vfpf_update_tunn_param_tlv tunn_param_update; struct vfpf_update_coalesce update_coalesce; struct vfpf_read_coal_req_tlv read_coal_req; + struct vfpf_bulletin_update_mac_tlv bulletin_update_mac; struct tlv_buffer_size tlv_buf_size; }; @@ -650,6 +657,7 @@ enum { CHANNEL_TLV_COALESCE_UPDATE, CHANNEL_TLV_QID, CHANNEL_TLV_COALESCE_READ, + CHANNEL_TLV_BULLETIN_UPDATE_MAC, CHANNEL_TLV_MAX, /* Required for iterating over vport-update tlvs. @@ -1042,6 +1050,13 @@ int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn, struct qed_tunnel_info *p_tunn); u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id); +/** + * @brief - Ask PF to update the MAC address in it's bulletin board + * + * @param p_mac - mac address to be updated in bulletin board + */ +int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac); + #else static inline void qed_vf_get_link_params(struct qed_hwfn *p_hwfn, struct qed_mcp_link_params *params) @@ -1228,6 +1243,12 @@ static inline int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn, return -EINVAL; } +static inline int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, + u8 *p_mac) +{ + return -EINVAL; +} + static inline u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 6687e04d1558..43569b1839be 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -550,8 +550,7 @@ void qede_force_mac(void *dev, u8 *mac, bool forced) __qede_lock(edev); - /* MAC hints take effect only if we haven't set one already */ - if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced) { + if (!is_valid_ether_addr(mac)) { __qede_unlock(edev); return; } @@ -1161,6 +1160,10 @@ int qede_set_mac_addr(struct net_device *ndev, void *p) if (edev->state != QEDE_STATE_OPEN) { DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "The device is currently down\n"); + /* Ask PF to explicitly update a copy in bulletin board */ + if (IS_VF(edev) && edev->ops->req_bulletin_update_mac) + edev->ops->req_bulletin_update_mac(edev->cdev, + ndev->dev_addr); goto out; } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 604ae78381ae..6d99b141a7aa 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -84,12 +84,11 @@ The RTL chips use a 64 element hash table based on the Ethernet CRC. */ static const int multicast_filter_limit = 32; -#define MAX_READ_REQUEST_SHIFT 12 #define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */ #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */ #define R8169_REGS_SIZE 256 -#define R8169_NAPI_WEIGHT 64 +#define R8169_RX_BUF_SIZE (SZ_16K - 1) #define NUM_TX_DESC 64 /* Number of Tx descriptor registers */ #define NUM_RX_DESC 256U /* Number of Rx descriptor registers */ #define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) @@ -172,12 +171,11 @@ enum rtl_tx_desc_version { #define JUMBO_7K (7*1024 - ETH_HLEN - 2) #define JUMBO_9K (9*1024 - ETH_HLEN - 2) -#define _R(NAME,TD,FW,SZ,B) { \ +#define _R(NAME,TD,FW,SZ) { \ .name = NAME, \ .txd_version = TD, \ .fw_name = FW, \ .jumbo_max = SZ, \ - .jumbo_tx_csum = B \ } static const struct { @@ -185,135 +183,111 @@ static const struct { enum rtl_tx_desc_version txd_version; const char *fw_name; u16 jumbo_max; - bool jumbo_tx_csum; } rtl_chip_infos[] = { /* PCI devices. */ [RTL_GIGA_MAC_VER_01] = - _R("RTL8169", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_02] = - _R("RTL8169s", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169s", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_03] = - _R("RTL8110s", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8110s", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_04] = - _R("RTL8169sb/8110sb", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169sb/8110sb", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_05] = - _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K), [RTL_GIGA_MAC_VER_06] = - _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K, true), + _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K), /* PCI-E devices. */ [RTL_GIGA_MAC_VER_07] = - _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true), + _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_08] = - _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true), + _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_09] = - _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true), + _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_10] = - _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_11] = - _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K), [RTL_GIGA_MAC_VER_12] = - _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K), [RTL_GIGA_MAC_VER_13] = - _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_14] = - _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_15] = - _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_16] = - _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), + _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K), [RTL_GIGA_MAC_VER_17] = - _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K), [RTL_GIGA_MAC_VER_18] = - _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_19] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_20] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_21] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_22] = - _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_23] = - _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_24] = - _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), + _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K), [RTL_GIGA_MAC_VER_25] = - _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_1, - JUMBO_9K, false), + _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_1, JUMBO_9K), [RTL_GIGA_MAC_VER_26] = - _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_2, - JUMBO_9K, false), + _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_2, JUMBO_9K), [RTL_GIGA_MAC_VER_27] = - _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_28] = - _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_29] = - _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, - JUMBO_1K, true), + _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_30] = - _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, - JUMBO_1K, true), + _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_31] = - _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_32] = - _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_1, - JUMBO_9K, false), + _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_1, JUMBO_9K), [RTL_GIGA_MAC_VER_33] = - _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_2, - JUMBO_9K, false), + _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_2, JUMBO_9K), [RTL_GIGA_MAC_VER_34] = - _R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3, - JUMBO_9K, false), + _R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3, JUMBO_9K), [RTL_GIGA_MAC_VER_35] = - _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_1, - JUMBO_9K, false), + _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_1, JUMBO_9K), [RTL_GIGA_MAC_VER_36] = - _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_2, - JUMBO_9K, false), + _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_2, JUMBO_9K), [RTL_GIGA_MAC_VER_37] = - _R("RTL8402", RTL_TD_1, FIRMWARE_8402_1, - JUMBO_1K, true), + _R("RTL8402", RTL_TD_1, FIRMWARE_8402_1, JUMBO_1K), [RTL_GIGA_MAC_VER_38] = - _R("RTL8411", RTL_TD_1, FIRMWARE_8411_1, - JUMBO_9K, false), + _R("RTL8411", RTL_TD_1, FIRMWARE_8411_1, JUMBO_9K), [RTL_GIGA_MAC_VER_39] = - _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_1, - JUMBO_1K, true), + _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_40] = - _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_2, - JUMBO_9K, false), + _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_2, JUMBO_9K), [RTL_GIGA_MAC_VER_41] = - _R("RTL8168g/8111g", RTL_TD_1, NULL, JUMBO_9K, false), + _R("RTL8168g/8111g", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_42] = - _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_3, - JUMBO_9K, false), + _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_3, JUMBO_9K), [RTL_GIGA_MAC_VER_43] = - _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_2, - JUMBO_1K, true), + _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_2, JUMBO_1K), [RTL_GIGA_MAC_VER_44] = - _R("RTL8411", RTL_TD_1, FIRMWARE_8411_2, - JUMBO_9K, false), + _R("RTL8411", RTL_TD_1, FIRMWARE_8411_2, JUMBO_9K), [RTL_GIGA_MAC_VER_45] = - _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_1, - JUMBO_9K, false), + _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_1, JUMBO_9K), [RTL_GIGA_MAC_VER_46] = - _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_2, - JUMBO_9K, false), + _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_2, JUMBO_9K), [RTL_GIGA_MAC_VER_47] = - _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_1, - JUMBO_1K, false), + _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_1, JUMBO_1K), [RTL_GIGA_MAC_VER_48] = - _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_2, - JUMBO_1K, false), + _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_2, JUMBO_1K), [RTL_GIGA_MAC_VER_49] = - _R("RTL8168ep/8111ep", RTL_TD_1, NULL, - JUMBO_9K, false), + _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_50] = - _R("RTL8168ep/8111ep", RTL_TD_1, NULL, - JUMBO_9K, false), + _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K), [RTL_GIGA_MAC_VER_51] = - _R("RTL8168ep/8111ep", RTL_TD_1, NULL, - JUMBO_9K, false), + _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K), }; #undef _R @@ -345,7 +319,6 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl); -static int rx_buf_sz = 16383; static int use_dac = -1; static struct { u32 msg_enable; @@ -437,13 +410,8 @@ enum rtl8168_8101_registers { CSIAR = 0x68, #define CSIAR_FLAG 0x80000000 #define CSIAR_WRITE_CMD 0x80000000 -#define CSIAR_BYTE_ENABLE 0x0f -#define CSIAR_BYTE_ENABLE_SHIFT 12 -#define CSIAR_ADDR_MASK 0x0fff -#define CSIAR_FUNC_CARD 0x00000000 -#define CSIAR_FUNC_SDIO 0x00010000 -#define CSIAR_FUNC_NIC 0x00020000 -#define CSIAR_FUNC_NIC2 0x00010000 +#define CSIAR_BYTE_ENABLE 0x0000f000 +#define CSIAR_ADDR_MASK 0x00000fff PMCH = 0x6f, EPHYAR = 0x80, #define EPHYAR_FLAG 0x80000000 @@ -626,6 +594,7 @@ enum rtl_register_content { RxChkSum = (1 << 5), PCIDAC = (1 << 4), PCIMulRW = (1 << 3), +#define INTT_MASK GENMASK(1, 0) INTT_0 = 0x0000, // 8168 INTT_1 = 0x0001, // 8168 INTT_2 = 0x0002, // 8168 @@ -716,6 +685,7 @@ enum rtl_rx_desc_bit { }; #define RsvdMask 0x3fffc000 +#define CPCMD_QUIRK_MASK (Normal_mode | RxVlan | RxChkSum | INTT_MASK) struct TxDesc { __le32 opts1; @@ -778,7 +748,6 @@ struct rtl8169_private { struct net_device *dev; struct napi_struct napi; u32 msg_enable; - u16 txd_version; u16 mac_version; u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ @@ -802,26 +771,16 @@ struct rtl8169_private { int (*read)(struct rtl8169_private *, int); } mdio_ops; - struct pll_power_ops { - void (*down)(struct rtl8169_private *); - void (*up)(struct rtl8169_private *); - } pll_power_ops; - struct jumbo_ops { void (*enable)(struct rtl8169_private *); void (*disable)(struct rtl8169_private *); } jumbo_ops; - struct csi_ops { - void (*write)(struct rtl8169_private *, int, int); - u32 (*read)(struct rtl8169_private *, int); - } csi_ops; - int (*set_speed)(struct net_device *, u8 aneg, u16 sp, u8 dpx, u32 adv); int (*get_link_ksettings)(struct net_device *, struct ethtool_link_ksettings *); void (*phy_reset_enable)(struct rtl8169_private *tp); - void (*hw_start)(struct net_device *); + void (*hw_start)(struct rtl8169_private *tp); unsigned int (*phy_reset_pending)(struct rtl8169_private *tp); unsigned int (*link_ok)(struct rtl8169_private *tp); int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd); @@ -833,14 +792,11 @@ struct rtl8169_private { struct work_struct work; } wk; - unsigned features; - struct mii_if_info mii; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; u32 saved_wolopts; - u32 opts1_mask; struct rtl_fw { const struct firmware *fw; @@ -1645,23 +1601,8 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp) if (options & LinkUp) wolopts |= WAKE_PHY; switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_35: - case RTL_GIGA_MAC_VER_36: - case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2) wolopts |= WAKE_MAGIC; break; @@ -1722,23 +1663,8 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) RTL_W8(tp, Cfg9346, Cfg9346_Unlock); switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_35: - case RTL_GIGA_MAC_VER_36: - case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: tmp = ARRAY_SIZE(cfg) - 1; if (wolopts & WAKE_MAGIC) rtl_w0w1_eri(tp, @@ -1960,18 +1886,20 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev, features &= ~NETIF_F_ALL_TSO; if (dev->mtu > JUMBO_1K && - !rtl_chip_infos[tp->mac_version].jumbo_tx_csum) + tp->mac_version > RTL_GIGA_MAC_VER_06) features &= ~NETIF_F_IP_CSUM; return features; } -static void __rtl8169_set_features(struct net_device *dev, - netdev_features_t features) +static int rtl8169_set_features(struct net_device *dev, + netdev_features_t features) { struct rtl8169_private *tp = netdev_priv(dev); u32 rx_config; + rtl_lock_work(tp); + rx_config = RTL_R32(tp, RxConfig); if (features & NETIF_F_RXALL) rx_config |= (AcceptErr | AcceptRunt); @@ -1990,28 +1918,14 @@ static void __rtl8169_set_features(struct net_device *dev, else tp->cp_cmd &= ~RxVlan; - tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum); - RTL_W16(tp, CPlusCmd, tp->cp_cmd); RTL_R16(tp, CPlusCmd); -} -static int rtl8169_set_features(struct net_device *dev, - netdev_features_t features) -{ - struct rtl8169_private *tp = netdev_priv(dev); - - features &= NETIF_F_RXALL | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX; - - rtl_lock_work(tp); - if (features ^ dev->features) - __rtl8169_set_features(dev, features); rtl_unlock_work(tp); return 0; } - static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb) { return (skb_vlan_tag_present(skb)) ? @@ -2155,9 +2069,8 @@ DECLARE_RTL_COND(rtl_counters_cond) return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump); } -static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) +static bool rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd) { - struct rtl8169_private *tp = netdev_priv(dev); dma_addr_t paddr = tp->counters_phys_addr; u32 cmd; @@ -2170,10 +2083,8 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); } -static bool rtl8169_reset_counters(struct net_device *dev) +static bool rtl8169_reset_counters(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - /* * Versions prior to RTL_GIGA_MAC_VER_19 don't support resetting the * tally counters. @@ -2181,13 +2092,11 @@ static bool rtl8169_reset_counters(struct net_device *dev) if (tp->mac_version < RTL_GIGA_MAC_VER_19) return true; - return rtl8169_do_counters(dev, CounterReset); + return rtl8169_do_counters(tp, CounterReset); } -static bool rtl8169_update_counters(struct net_device *dev) +static bool rtl8169_update_counters(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - /* * Some chips are unable to dump tally counters when the receiver * is disabled. @@ -2195,12 +2104,11 @@ static bool rtl8169_update_counters(struct net_device *dev) if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0) return true; - return rtl8169_do_counters(dev, CounterDump); + return rtl8169_do_counters(tp, CounterDump); } -static bool rtl8169_init_counter_offsets(struct net_device *dev) +static bool rtl8169_init_counter_offsets(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); struct rtl8169_counters *counters = tp->counters; bool ret = false; @@ -2223,10 +2131,10 @@ static bool rtl8169_init_counter_offsets(struct net_device *dev) return true; /* If both, reset and update fail, propagate to caller. */ - if (rtl8169_reset_counters(dev)) + if (rtl8169_reset_counters(tp)) ret = true; - if (rtl8169_update_counters(dev)) + if (rtl8169_update_counters(tp)) ret = true; tp->tc_offset.tx_errors = counters->tx_errors; @@ -2249,7 +2157,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev, pm_runtime_get_noresume(d); if (pm_runtime_active(d)) - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); pm_runtime_put_noidle(d); @@ -2391,7 +2299,7 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) if (IS_ERR(ci)) return PTR_ERR(ci); - scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3]; + scale = &ci->scalev[tp->cp_cmd & INTT_MASK]; /* read IntrMitigate and adjust according to scale */ for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) { @@ -2490,7 +2398,7 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) RTL_W16(tp, IntrMitigate, swab16(w)); - tp->cp_cmd = (tp->cp_cmd & ~3) | cp01; + tp->cp_cmd = (tp->cp_cmd & ~INTT_MASK) | cp01; RTL_W16(tp, CPlusCmd, tp->cp_cmd); RTL_R16(tp, CPlusCmd); @@ -2520,7 +2428,7 @@ static const struct ethtool_ops rtl8169_ethtool_ops = { }; static void rtl8169_get_mac_version(struct rtl8169_private *tp, - struct net_device *dev, u8 default_version) + u8 default_version) { /* * The driver currently handles the 8168Bf and the 8168Be identically @@ -2560,12 +2468,10 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, /* 8168E family. */ { 0x7c800000, 0x2c800000, RTL_GIGA_MAC_VER_34 }, - { 0x7cf00000, 0x2c200000, RTL_GIGA_MAC_VER_33 }, { 0x7cf00000, 0x2c100000, RTL_GIGA_MAC_VER_32 }, { 0x7c800000, 0x2c000000, RTL_GIGA_MAC_VER_33 }, /* 8168D family. */ - { 0x7cf00000, 0x28300000, RTL_GIGA_MAC_VER_26 }, { 0x7cf00000, 0x28100000, RTL_GIGA_MAC_VER_25 }, { 0x7c800000, 0x28000000, RTL_GIGA_MAC_VER_26 }, @@ -2575,32 +2481,24 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, { 0x7cf00000, 0x28b00000, RTL_GIGA_MAC_VER_31 }, /* 8168C family. */ - { 0x7cf00000, 0x3cb00000, RTL_GIGA_MAC_VER_24 }, { 0x7cf00000, 0x3c900000, RTL_GIGA_MAC_VER_23 }, { 0x7cf00000, 0x3c800000, RTL_GIGA_MAC_VER_18 }, { 0x7c800000, 0x3c800000, RTL_GIGA_MAC_VER_24 }, { 0x7cf00000, 0x3c000000, RTL_GIGA_MAC_VER_19 }, { 0x7cf00000, 0x3c200000, RTL_GIGA_MAC_VER_20 }, { 0x7cf00000, 0x3c300000, RTL_GIGA_MAC_VER_21 }, - { 0x7cf00000, 0x3c400000, RTL_GIGA_MAC_VER_22 }, { 0x7c800000, 0x3c000000, RTL_GIGA_MAC_VER_22 }, /* 8168B family. */ { 0x7cf00000, 0x38000000, RTL_GIGA_MAC_VER_12 }, - { 0x7cf00000, 0x38500000, RTL_GIGA_MAC_VER_17 }, { 0x7c800000, 0x38000000, RTL_GIGA_MAC_VER_17 }, { 0x7c800000, 0x30000000, RTL_GIGA_MAC_VER_11 }, /* 8101 family. */ - { 0x7cf00000, 0x44900000, RTL_GIGA_MAC_VER_39 }, { 0x7c800000, 0x44800000, RTL_GIGA_MAC_VER_39 }, { 0x7c800000, 0x44000000, RTL_GIGA_MAC_VER_37 }, - { 0x7cf00000, 0x40b00000, RTL_GIGA_MAC_VER_30 }, - { 0x7cf00000, 0x40a00000, RTL_GIGA_MAC_VER_30 }, { 0x7cf00000, 0x40900000, RTL_GIGA_MAC_VER_29 }, { 0x7c800000, 0x40800000, RTL_GIGA_MAC_VER_30 }, - { 0x7cf00000, 0x34a00000, RTL_GIGA_MAC_VER_09 }, - { 0x7cf00000, 0x24a00000, RTL_GIGA_MAC_VER_09 }, { 0x7cf00000, 0x34900000, RTL_GIGA_MAC_VER_08 }, { 0x7cf00000, 0x24900000, RTL_GIGA_MAC_VER_08 }, { 0x7cf00000, 0x34800000, RTL_GIGA_MAC_VER_07 }, @@ -2635,8 +2533,8 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, tp->mac_version = p->mac_version; if (tp->mac_version == RTL_GIGA_MAC_NONE) { - netif_notice(tp, probe, dev, - "unknown MAC, using family default\n"); + dev_notice(tp_to_dev(tp), + "unknown MAC, using family default\n"); tp->mac_version = default_version; } else if (tp->mac_version == RTL_GIGA_MAC_VER_42) { tp->mac_version = tp->mii.supports_gmii ? @@ -4685,18 +4583,7 @@ static void rtl_init_mdio_ops(struct rtl8169_private *tp) ops->write = r8168dp_2_mdio_write; ops->read = r8168dp_2_mdio_read; break; - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: ops->write = r8168g_mdio_write; ops->read = r8168g_mdio_read; break; @@ -4741,21 +4628,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_32: case RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_39: - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: + case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_51: RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | AcceptBroadcast | AcceptMulticast | AcceptMyPhys); break; @@ -4775,79 +4648,13 @@ static bool rtl_wol_pll_power_down(struct rtl8169_private *tp) return true; } -static void r810x_phy_power_down(struct rtl8169_private *tp) -{ - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, MII_BMCR, BMCR_PDOWN); -} - -static void r810x_phy_power_up(struct rtl8169_private *tp) -{ - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, MII_BMCR, BMCR_ANENABLE); -} - -static void r810x_pll_power_down(struct rtl8169_private *tp) -{ - if (rtl_wol_pll_power_down(tp)) - return; - - r810x_phy_power_down(tp); - - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_07: - case RTL_GIGA_MAC_VER_08: - case RTL_GIGA_MAC_VER_09: - case RTL_GIGA_MAC_VER_10: - case RTL_GIGA_MAC_VER_13: - case RTL_GIGA_MAC_VER_16: - break; - default: - RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); - break; - } -} - -static void r810x_pll_power_up(struct rtl8169_private *tp) -{ - r810x_phy_power_up(tp); - - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_07: - case RTL_GIGA_MAC_VER_08: - case RTL_GIGA_MAC_VER_09: - case RTL_GIGA_MAC_VER_10: - case RTL_GIGA_MAC_VER_13: - case RTL_GIGA_MAC_VER_16: - break; - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0); - break; - default: - RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80); - break; - } -} - static void r8168_phy_power_up(struct rtl8169_private *tp) { rtl_writephy(tp, 0x1f, 0x0000); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_11: case RTL_GIGA_MAC_VER_12: - case RTL_GIGA_MAC_VER_17: - case RTL_GIGA_MAC_VER_18: - case RTL_GIGA_MAC_VER_19: - case RTL_GIGA_MAC_VER_20: - case RTL_GIGA_MAC_VER_21: - case RTL_GIGA_MAC_VER_22: - case RTL_GIGA_MAC_VER_23: - case RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_25: - case RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: + case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: rtl_writephy(tp, 0x0e, 0x0000); break; @@ -4870,18 +4677,7 @@ static void r8168_phy_power_down(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_11: case RTL_GIGA_MAC_VER_12: - case RTL_GIGA_MAC_VER_17: - case RTL_GIGA_MAC_VER_18: - case RTL_GIGA_MAC_VER_19: - case RTL_GIGA_MAC_VER_20: - case RTL_GIGA_MAC_VER_21: - case RTL_GIGA_MAC_VER_22: - case RTL_GIGA_MAC_VER_23: - case RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_25: - case RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: + case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: rtl_writephy(tp, 0x0e, 0x0200); default: @@ -4895,12 +4691,6 @@ static void r8168_pll_power_down(struct rtl8169_private *tp) if (r8168_check_dash(tp)) return; - if ((tp->mac_version == RTL_GIGA_MAC_VER_23 || - tp->mac_version == RTL_GIGA_MAC_VER_24) && - (RTL_R16(tp, CPlusCmd) & ASF)) { - return; - } - if (tp->mac_version == RTL_GIGA_MAC_VER_32 || tp->mac_version == RTL_GIGA_MAC_VER_33) rtl_ephy_write(tp, 0x19, 0xff64); @@ -4911,16 +4701,15 @@ static void r8168_pll_power_down(struct rtl8169_private *tp) r8168_phy_power_down(tp); switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25: - case RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: - case RTL_GIGA_MAC_VER_31: - case RTL_GIGA_MAC_VER_32: - case RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_37: + case RTL_GIGA_MAC_VER_39: + case RTL_GIGA_MAC_VER_43: case RTL_GIGA_MAC_VER_44: case RTL_GIGA_MAC_VER_45: case RTL_GIGA_MAC_VER_46: + case RTL_GIGA_MAC_VER_47: + case RTL_GIGA_MAC_VER_48: case RTL_GIGA_MAC_VER_50: case RTL_GIGA_MAC_VER_51: RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); @@ -4938,18 +4727,17 @@ static void r8168_pll_power_down(struct rtl8169_private *tp) static void r8168_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25: - case RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: - case RTL_GIGA_MAC_VER_31: - case RTL_GIGA_MAC_VER_32: - case RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_37: + case RTL_GIGA_MAC_VER_39: + case RTL_GIGA_MAC_VER_43: RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80); break; case RTL_GIGA_MAC_VER_44: case RTL_GIGA_MAC_VER_45: case RTL_GIGA_MAC_VER_46: + case RTL_GIGA_MAC_VER_47: + case RTL_GIGA_MAC_VER_48: case RTL_GIGA_MAC_VER_50: case RTL_GIGA_MAC_VER_51: RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0); @@ -4966,127 +4754,41 @@ static void r8168_pll_power_up(struct rtl8169_private *tp) r8168_phy_power_up(tp); } -static void rtl_generic_op(struct rtl8169_private *tp, - void (*op)(struct rtl8169_private *)) -{ - if (op) - op(tp); -} - static void rtl_pll_power_down(struct rtl8169_private *tp) { - rtl_generic_op(tp, tp->pll_power_ops.down); + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: + case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15: + break; + default: + r8168_pll_power_down(tp); + } } static void rtl_pll_power_up(struct rtl8169_private *tp) { - rtl_generic_op(tp, tp->pll_power_ops.up); -} - -static void rtl_init_pll_power_ops(struct rtl8169_private *tp) -{ - struct pll_power_ops *ops = &tp->pll_power_ops; - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_07: - case RTL_GIGA_MAC_VER_08: - case RTL_GIGA_MAC_VER_09: - case RTL_GIGA_MAC_VER_10: - case RTL_GIGA_MAC_VER_16: - case RTL_GIGA_MAC_VER_29: - case RTL_GIGA_MAC_VER_30: - case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_39: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - ops->down = r810x_pll_power_down; - ops->up = r810x_pll_power_up; - break; - - case RTL_GIGA_MAC_VER_11: - case RTL_GIGA_MAC_VER_12: - case RTL_GIGA_MAC_VER_17: - case RTL_GIGA_MAC_VER_18: - case RTL_GIGA_MAC_VER_19: - case RTL_GIGA_MAC_VER_20: - case RTL_GIGA_MAC_VER_21: - case RTL_GIGA_MAC_VER_22: - case RTL_GIGA_MAC_VER_23: - case RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_25: - case RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: - case RTL_GIGA_MAC_VER_31: - case RTL_GIGA_MAC_VER_32: - case RTL_GIGA_MAC_VER_33: - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_35: - case RTL_GIGA_MAC_VER_36: - case RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: - ops->down = r8168_pll_power_down; - ops->up = r8168_pll_power_up; + case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: + case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15: break; - default: - ops->down = NULL; - ops->up = NULL; - break; + r8168_pll_power_up(tp); } } static void rtl_init_rxcfg(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01: - case RTL_GIGA_MAC_VER_02: - case RTL_GIGA_MAC_VER_03: - case RTL_GIGA_MAC_VER_04: - case RTL_GIGA_MAC_VER_05: - case RTL_GIGA_MAC_VER_06: - case RTL_GIGA_MAC_VER_10: - case RTL_GIGA_MAC_VER_11: - case RTL_GIGA_MAC_VER_12: - case RTL_GIGA_MAC_VER_13: - case RTL_GIGA_MAC_VER_14: - case RTL_GIGA_MAC_VER_15: - case RTL_GIGA_MAC_VER_16: - case RTL_GIGA_MAC_VER_17: + case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: + case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17: RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST); break; - case RTL_GIGA_MAC_VER_18: - case RTL_GIGA_MAC_VER_19: - case RTL_GIGA_MAC_VER_20: - case RTL_GIGA_MAC_VER_21: - case RTL_GIGA_MAC_VER_22: - case RTL_GIGA_MAC_VER_23: - case RTL_GIGA_MAC_VER_24: + case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_35: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; default: @@ -5102,16 +4804,20 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp) static void rtl_hw_jumbo_enable(struct rtl8169_private *tp) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); - rtl_generic_op(tp, tp->jumbo_ops.enable); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + if (tp->jumbo_ops.enable) { + RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + tp->jumbo_ops.enable(tp); + RTL_W8(tp, Cfg9346, Cfg9346_Lock); + } } static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); - rtl_generic_op(tp, tp->jumbo_ops.disable); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + if (tp->jumbo_ops.disable) { + RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + tp->jumbo_ops.disable(tp); + RTL_W8(tp, Cfg9346, Cfg9346_Lock); + } } static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp) @@ -5125,7 +4831,7 @@ static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp) { RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0); RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); } static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp) @@ -5151,7 +4857,7 @@ static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp) RTL_W8(tp, MaxTxPacketSize, 0x0c); RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0); RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); } static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp) @@ -5163,7 +4869,7 @@ static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp) static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp) { rtl_tx_performance_tweak(tp, - (0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN); + PCI_EXP_DEVCTL_READRQ_4096B | PCI_EXP_DEVCTL_NOSNOOP_EN); } static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp) @@ -5223,18 +4929,7 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp) * No action needed for jumbo frames with 8169. * No jumbo for 810x at all. */ - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: default: ops->disable = NULL; ops->enable = NULL; @@ -5320,32 +5015,21 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp) rtl_rx_close(tp); - if (tp->mac_version == RTL_GIGA_MAC_VER_27 || - tp->mac_version == RTL_GIGA_MAC_VER_28 || - tp->mac_version == RTL_GIGA_MAC_VER_31) { + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_27: + case RTL_GIGA_MAC_VER_28: + case RTL_GIGA_MAC_VER_31: rtl_udelay_loop_wait_low(tp, &rtl_npq_cond, 20, 42*42); - } else if (tp->mac_version == RTL_GIGA_MAC_VER_34 || - tp->mac_version == RTL_GIGA_MAC_VER_35 || - tp->mac_version == RTL_GIGA_MAC_VER_36 || - tp->mac_version == RTL_GIGA_MAC_VER_37 || - tp->mac_version == RTL_GIGA_MAC_VER_38 || - tp->mac_version == RTL_GIGA_MAC_VER_40 || - tp->mac_version == RTL_GIGA_MAC_VER_41 || - tp->mac_version == RTL_GIGA_MAC_VER_42 || - tp->mac_version == RTL_GIGA_MAC_VER_43 || - tp->mac_version == RTL_GIGA_MAC_VER_44 || - tp->mac_version == RTL_GIGA_MAC_VER_45 || - tp->mac_version == RTL_GIGA_MAC_VER_46 || - tp->mac_version == RTL_GIGA_MAC_VER_47 || - tp->mac_version == RTL_GIGA_MAC_VER_48 || - tp->mac_version == RTL_GIGA_MAC_VER_49 || - tp->mac_version == RTL_GIGA_MAC_VER_50 || - tp->mac_version == RTL_GIGA_MAC_VER_51) { + break; + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq); rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666); - } else { + break; + default: RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq); udelay(100); + break; } rtl_hw_reset(tp); @@ -5358,13 +5042,10 @@ static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp) (InterFrameGap << TxInterFrameGapShift)); } -static void rtl_hw_start(struct net_device *dev) +static void rtl_set_rx_max_size(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - - tp->hw_start(dev); - - rtl_irq_enable_all(tp); + /* Low hurts. Let's disable the filtering. */ + RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1); } static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp) @@ -5380,19 +5061,23 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp) RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32)); } -static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp) +static void rtl_hw_start(struct rtl8169_private *tp) { - u16 cmd; + RTL_W8(tp, Cfg9346, Cfg9346_Unlock); - cmd = RTL_R16(tp, CPlusCmd); - RTL_W16(tp, CPlusCmd, cmd); - return cmd; -} + tp->hw_start(tp); -static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz) -{ - /* Low hurts. Let's disable the filtering. */ - RTL_W16(tp, RxMaxSize, rx_buf_sz + 1); + rtl_set_rx_max_size(tp); + rtl_set_rx_tx_desc_registers(tp); + rtl_set_rx_tx_config_registers(tp); + RTL_W8(tp, Cfg9346, Cfg9346_Lock); + + /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ + RTL_R8(tp, IntrMask); + RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); + /* no early-rx interrupts */ + RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); + rtl_irq_enable_all(tp); } static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version) @@ -5472,36 +5157,14 @@ static void rtl_set_rx_mode(struct net_device *dev) RTL_W32(tp, RxConfig, tmp); } -static void rtl_hw_start_8169(struct net_device *dev) +static void rtl_hw_start_8169(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - struct pci_dev *pdev = tp->pci_dev; - - if (tp->mac_version == RTL_GIGA_MAC_VER_05) { - RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW); - pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08); - } - - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); - if (tp->mac_version == RTL_GIGA_MAC_VER_01 || - tp->mac_version == RTL_GIGA_MAC_VER_02 || - tp->mac_version == RTL_GIGA_MAC_VER_03 || - tp->mac_version == RTL_GIGA_MAC_VER_04) - RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); - - rtl_init_rxcfg(tp); + if (tp->mac_version == RTL_GIGA_MAC_VER_05) + pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08); RTL_W8(tp, EarlyTxThres, NoEarlyTx); - rtl_set_rx_max_size(tp, rx_buf_sz); - - if (tp->mac_version == RTL_GIGA_MAC_VER_01 || - tp->mac_version == RTL_GIGA_MAC_VER_02 || - tp->mac_version == RTL_GIGA_MAC_VER_03 || - tp->mac_version == RTL_GIGA_MAC_VER_04) - rtl_set_rx_tx_config_registers(tp); - - tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW; + tp->cp_cmd |= PCIMulRW; if (tp->mac_version == RTL_GIGA_MAC_VER_02 || tp->mac_version == RTL_GIGA_MAC_VER_03) { @@ -5520,56 +5183,7 @@ static void rtl_hw_start_8169(struct net_device *dev) */ RTL_W16(tp, IntrMitigate, 0x0000); - rtl_set_rx_tx_desc_registers(tp); - - if (tp->mac_version != RTL_GIGA_MAC_VER_01 && - tp->mac_version != RTL_GIGA_MAC_VER_02 && - tp->mac_version != RTL_GIGA_MAC_VER_03 && - tp->mac_version != RTL_GIGA_MAC_VER_04) { - RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); - rtl_set_rx_tx_config_registers(tp); - } - - RTL_W8(tp, Cfg9346, Cfg9346_Lock); - - /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ - RTL_R8(tp, IntrMask); - RTL_W32(tp, RxMissed, 0); - - rtl_set_rx_mode(dev); - - /* no early-rx interrupts */ - RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); -} - -static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value) -{ - if (tp->csi_ops.write) - tp->csi_ops.write(tp, addr, value); -} - -static u32 rtl_csi_read(struct rtl8169_private *tp, int addr) -{ - return tp->csi_ops.read ? tp->csi_ops.read(tp, addr) : ~0; -} - -static void rtl_csi_access_enable(struct rtl8169_private *tp, u32 bits) -{ - u32 csi; - - csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff; - rtl_csi_write(tp, 0x070c, csi | bits); -} - -static void rtl_csi_access_enable_1(struct rtl8169_private *tp) -{ - rtl_csi_access_enable(tp, 0x17000000); -} - -static void rtl_csi_access_enable_2(struct rtl8169_private *tp) -{ - rtl_csi_access_enable(tp, 0x27000000); } DECLARE_RTL_COND(rtl_csiar_cond) @@ -5577,101 +5191,55 @@ DECLARE_RTL_COND(rtl_csiar_cond) return RTL_R32(tp, CSIAR) & CSIAR_FLAG; } -static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value) -{ - RTL_W32(tp, CSIDR, value); - RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) | - CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT); - - rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100); -} - -static u32 r8169_csi_read(struct rtl8169_private *tp, int addr) +static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value) { - RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | - CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT); + u32 func = PCI_FUNC(tp->pci_dev->devfn); - return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ? - RTL_R32(tp, CSIDR) : ~0; -} - -static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value) -{ RTL_W32(tp, CSIDR, value); RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) | - CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT | - CSIAR_FUNC_NIC); + CSIAR_BYTE_ENABLE | func << 16); rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100); } -static u32 r8402_csi_read(struct rtl8169_private *tp, int addr) +static u32 rtl_csi_read(struct rtl8169_private *tp, int addr) { - RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC | - CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT); + u32 func = PCI_FUNC(tp->pci_dev->devfn); + + RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | func << 16 | + CSIAR_BYTE_ENABLE); return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ? RTL_R32(tp, CSIDR) : ~0; } -static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value) +static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val) { - RTL_W32(tp, CSIDR, value); - RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) | - CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT | - CSIAR_FUNC_NIC2); + struct pci_dev *pdev = tp->pci_dev; + u32 csi; - rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100); + /* According to Realtek the value at config space address 0x070f + * controls the L0s/L1 entrance latency. We try standard ECAM access + * first and if it fails fall back to CSI. + */ + if (pdev->cfg_size > 0x070f && + pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL) + return; + + netdev_notice_once(tp->dev, + "No native access to PCI extended config space, falling back to CSI\n"); + csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff; + rtl_csi_write(tp, 0x070c, csi | val << 24); } -static u32 r8411_csi_read(struct rtl8169_private *tp, int addr) +static void rtl_csi_access_enable_1(struct rtl8169_private *tp) { - RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 | - CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT); - - return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ? - RTL_R32(tp, CSIDR) : ~0; + rtl_csi_access_enable(tp, 0x17); } -static void rtl_init_csi_ops(struct rtl8169_private *tp) +static void rtl_csi_access_enable_2(struct rtl8169_private *tp) { - struct csi_ops *ops = &tp->csi_ops; - - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01: - case RTL_GIGA_MAC_VER_02: - case RTL_GIGA_MAC_VER_03: - case RTL_GIGA_MAC_VER_04: - case RTL_GIGA_MAC_VER_05: - case RTL_GIGA_MAC_VER_06: - case RTL_GIGA_MAC_VER_10: - case RTL_GIGA_MAC_VER_11: - case RTL_GIGA_MAC_VER_12: - case RTL_GIGA_MAC_VER_13: - case RTL_GIGA_MAC_VER_14: - case RTL_GIGA_MAC_VER_15: - case RTL_GIGA_MAC_VER_16: - case RTL_GIGA_MAC_VER_17: - ops->write = NULL; - ops->read = NULL; - break; - - case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_38: - ops->write = r8402_csi_write; - ops->read = r8402_csi_read; - break; - - case RTL_GIGA_MAC_VER_44: - ops->write = r8411_csi_write; - ops->read = r8411_csi_read; - break; - - default: - ops->write = r8169_csi_write; - ops->read = r8169_csi_read; - break; - } + rtl_csi_access_enable(tp, 0x27); } struct ephy_info { @@ -5718,25 +5286,15 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable) RTL_W8(tp, Config3, data); } -#define R8168_CPCMD_QUIRK_MASK (\ - EnableBist | \ - Mac_dbgo_oe | \ - Force_half_dup | \ - Force_rxflow_en | \ - Force_txflow_en | \ - Cxpl_dbg_sel | \ - ASF | \ - PktCntrDisable | \ - Mac_dbgo_sel) - static void rtl_hw_start_8168bb(struct rtl8169_private *tp) { RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); + tp->cp_cmd &= CPCMD_QUIRK_MASK; + RTL_W16(tp, CPlusCmd, tp->cp_cmd); if (tp->dev->mtu <= ETH_DATA_LEN) { - rtl_tx_performance_tweak(tp, (0x5 << MAX_READ_REQUEST_SHIFT) | + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B | PCI_EXP_DEVCTL_NOSNOOP_EN); } } @@ -5757,11 +5315,12 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp) RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_disable_clock_request(tp); - RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); + tp->cp_cmd &= CPCMD_QUIRK_MASK; + RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp) @@ -5788,9 +5347,10 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp) RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); + tp->cp_cmd &= CPCMD_QUIRK_MASK; + RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) @@ -5805,9 +5365,10 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) RTL_W8(tp, MaxTxPacketSize, TxPacketMax); if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); + tp->cp_cmd &= CPCMD_QUIRK_MASK; + RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168c_1(struct rtl8169_private *tp) @@ -5862,9 +5423,10 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp) RTL_W8(tp, MaxTxPacketSize, TxPacketMax); if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK); + tp->cp_cmd &= CPCMD_QUIRK_MASK; + RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168dp(struct rtl8169_private *tp) @@ -5872,7 +5434,7 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp) rtl_csi_access_enable_1(tp); if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); RTL_W8(tp, MaxTxPacketSize, TxPacketMax); @@ -5889,7 +5451,7 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp) rtl_csi_access_enable_1(tp); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); RTL_W8(tp, MaxTxPacketSize, TxPacketMax); @@ -5921,7 +5483,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1)); if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); RTL_W8(tp, MaxTxPacketSize, TxPacketMax); @@ -5946,7 +5508,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2)); if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); @@ -5976,7 +5538,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) { rtl_csi_access_enable_2(tp); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); @@ -6047,7 +5609,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp) rtl_csi_access_enable_1(tp); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC); rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); @@ -6147,7 +5709,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_csi_access_enable_1(tp); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC); rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); @@ -6229,7 +5791,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) rtl_csi_access_enable_1(tp); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC); rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); @@ -6325,18 +5887,12 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp) r8168_mac_ocp_write(tp, 0xe860, data); } -static void rtl_hw_start_8168(struct net_device *dev) +static void rtl_hw_start_8168(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - rtl_set_rx_max_size(tp, rx_buf_sz); - - tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1; - + tp->cp_cmd &= ~INTT_MASK; + tp->cp_cmd |= PktCntrDisable | INTT_1; RTL_W16(tp, CPlusCmd, tp->cp_cmd); RTL_W16(tp, IntrMitigate, 0x5151); @@ -6347,12 +5903,6 @@ static void rtl_hw_start_8168(struct net_device *dev) tp->event_slow &= ~RxOverflow; } - rtl_set_rx_tx_desc_registers(tp); - - rtl_set_rx_tx_config_registers(tp); - - RTL_R8(tp, IntrMask); - switch (tp->mac_version) { case RTL_GIGA_MAC_VER_11: rtl_hw_start_8168bb(tp); @@ -6453,30 +6003,11 @@ static void rtl_hw_start_8168(struct net_device *dev) default: printk(KERN_ERR PFX "%s: unknown chipset (mac_version = %d).\n", - dev->name, tp->mac_version); + tp->dev->name, tp->mac_version); break; } - - RTL_W8(tp, Cfg9346, Cfg9346_Lock); - - RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); - - rtl_set_rx_mode(dev); - - RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); } -#define R810X_CPCMD_QUIRK_MASK (\ - EnableBist | \ - Mac_dbgo_oe | \ - Force_half_dup | \ - Force_rxflow_en | \ - Force_txflow_en | \ - Cxpl_dbg_sel | \ - ASF | \ - PktCntrDisable | \ - Mac_dbgo_sel) - static void rtl_hw_start_8102e_1(struct rtl8169_private *tp) { static const struct ephy_info e_info_8102e_1[] = { @@ -6495,7 +6026,7 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp) RTL_W8(tp, DBG_REG, FIX_NAK_1); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); RTL_W8(tp, Config1, LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable); @@ -6512,7 +6043,7 @@ static void rtl_hw_start_8102e_2(struct rtl8169_private *tp) { rtl_csi_access_enable_2(tp); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable); RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); @@ -6575,7 +6106,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402)); - rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT); + rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC); @@ -6600,32 +6131,21 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) rtl_pcie_state_l2l3_enable(tp, false); } -static void rtl_hw_start_8101(struct net_device *dev) +static void rtl_hw_start_8101(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - struct pci_dev *pdev = tp->pci_dev; - if (tp->mac_version >= RTL_GIGA_MAC_VER_30) tp->event_slow &= ~RxFIFOOver; if (tp->mac_version == RTL_GIGA_MAC_VER_13 || tp->mac_version == RTL_GIGA_MAC_VER_16) - pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, + pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_NOSNOOP_EN); - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - rtl_set_rx_max_size(tp, rx_buf_sz); - - tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK; + tp->cp_cmd &= CPCMD_QUIRK_MASK; RTL_W16(tp, CPlusCmd, tp->cp_cmd); - rtl_set_rx_tx_desc_registers(tp); - - rtl_set_rx_tx_config_registers(tp); - switch (tp->mac_version) { case RTL_GIGA_MAC_VER_07: rtl_hw_start_8102e_1(tp); @@ -6662,17 +6182,7 @@ static void rtl_hw_start_8101(struct net_device *dev) break; } - RTL_W8(tp, Cfg9346, Cfg9346_Lock); - RTL_W16(tp, IntrMitigate, 0x0000); - - RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); - - rtl_set_rx_mode(dev); - - RTL_R8(tp, IntrMask); - - RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); } static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) @@ -6699,29 +6209,22 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc) static void rtl8169_free_rx_databuff(struct rtl8169_private *tp, void **data_buff, struct RxDesc *desc) { - dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz, - DMA_FROM_DEVICE); + dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), + R8169_RX_BUF_SIZE, DMA_FROM_DEVICE); kfree(*data_buff); *data_buff = NULL; rtl8169_make_unusable_by_asic(desc); } -static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz) +static inline void rtl8169_mark_to_asic(struct RxDesc *desc) { u32 eor = le32_to_cpu(desc->opts1) & RingEnd; /* Force memory writes to complete before releasing descriptor */ dma_wmb(); - desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz); -} - -static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping, - u32 rx_buf_sz) -{ - desc->addr = cpu_to_le64(mapping); - rtl8169_mark_to_asic(desc, rx_buf_sz); + desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE); } static inline void *rtl8169_align(void *data) @@ -6735,21 +6238,20 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp, void *data; dma_addr_t mapping; struct device *d = tp_to_dev(tp); - struct net_device *dev = tp->dev; - int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; + int node = dev_to_node(d); - data = kmalloc_node(rx_buf_sz, GFP_KERNEL, node); + data = kmalloc_node(R8169_RX_BUF_SIZE, GFP_KERNEL, node); if (!data) return NULL; if (rtl8169_align(data) != data) { kfree(data); - data = kmalloc_node(rx_buf_sz + 15, GFP_KERNEL, node); + data = kmalloc_node(R8169_RX_BUF_SIZE + 15, GFP_KERNEL, node); if (!data) return NULL; } - mapping = dma_map_single(d, rtl8169_align(data), rx_buf_sz, + mapping = dma_map_single(d, rtl8169_align(data), R8169_RX_BUF_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(d, mapping))) { if (net_ratelimit()) @@ -6757,7 +6259,8 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp, goto err_out; } - rtl8169_map_to_asic(desc, mapping, rx_buf_sz); + desc->addr = cpu_to_le64(mapping); + rtl8169_mark_to_asic(desc); return data; err_out: @@ -6789,9 +6292,6 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp) for (i = 0; i < NUM_RX_DESC; i++) { void *data; - if (tp->Rx_databuff[i]) - continue; - data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i); if (!data) { rtl8169_make_unusable_by_asic(tp->RxDescArray + i); @@ -6808,14 +6308,12 @@ err_out: return -ENOMEM; } -static int rtl8169_init_ring(struct net_device *dev) +static int rtl8169_init_ring(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); - rtl8169_init_ring_indexes(tp); - memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info)); - memset(tp->Rx_databuff, 0x0, NUM_RX_DESC * sizeof(void *)); + memset(tp->tx_skb, 0, sizeof(tp->tx_skb)); + memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff)); return rtl8169_rx_fill(tp); } @@ -6874,13 +6372,13 @@ static void rtl_reset_work(struct rtl8169_private *tp) rtl8169_hw_reset(tp); for (i = 0; i < NUM_RX_DESC; i++) - rtl8169_mark_to_asic(tp->RxDescArray + i, rx_buf_sz); + rtl8169_mark_to_asic(tp->RxDescArray + i); rtl8169_tx_clear(tp); rtl8169_init_ring_indexes(tp); napi_enable(&tp->napi); - rtl_hw_start(dev); + rtl_hw_start(tp); netif_wake_queue(dev); rtl8169_check_link_status(dev, tp); } @@ -7012,18 +6510,6 @@ static int msdn_giant_send_check(struct sk_buff *skb) return ret; } -static inline __be16 get_protocol(struct sk_buff *skb) -{ - __be16 protocol; - - if (skb->protocol == htons(ETH_P_8021Q)) - protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; - else - protocol = skb->protocol; - - return protocol; -} - static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp, struct sk_buff *skb, u32 *opts) { @@ -7060,7 +6546,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, return false; } - switch (get_protocol(skb)) { + switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): opts[0] |= TD1_GTSENV4; break; @@ -7092,7 +6578,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, return false; } - switch (get_protocol(skb)) { + switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): opts[1] |= TD1_IPv4_CS; ip_protocol = ip_hdr(skb)->protocol; @@ -7362,7 +6848,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data, prefetch(data); skb = napi_alloc_skb(&tp->napi, pkt_size); if (skb) - memcpy(skb->data, data, pkt_size); + skb_copy_to_linear_data(skb, data, pkt_size); dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); return skb; @@ -7380,7 +6866,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget struct RxDesc *desc = tp->RxDescArray + entry; u32 status; - status = le32_to_cpu(desc->opts1) & tp->opts1_mask; + status = le32_to_cpu(desc->opts1); if (status & DescOwn) break; @@ -7398,14 +6884,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget dev->stats.rx_length_errors++; if (status & RxCRC) dev->stats.rx_crc_errors++; - if (status & RxFOVF) { + /* RxFOVF is a reserved bit on later chip versions */ + if (tp->mac_version == RTL_GIGA_MAC_VER_01 && + status & RxFOVF) { rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); dev->stats.rx_fifo_errors++; - } - if ((status & (RxRUNT | RxCRC)) && - !(status & (RxRWT | RxFOVF)) && - (dev->features & NETIF_F_RXALL)) + } else if (status & (RxRUNT | RxCRC) && + !(status & RxRWT) && + dev->features & NETIF_F_RXALL) { goto process_pkt; + } } else { struct sk_buff *skb; dma_addr_t addr; @@ -7454,7 +6942,7 @@ process_pkt: } release_descriptor: desc->opts2 = 0; - rtl8169_mark_to_asic(desc, rx_buf_sz); + rtl8169_mark_to_asic(desc); } count = cur_rx - tp->cur_rx; @@ -7465,8 +6953,7 @@ release_descriptor: static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) { - struct net_device *dev = dev_instance; - struct rtl8169_private *tp = netdev_priv(dev); + struct rtl8169_private *tp = dev_instance; int handled = 0; u16 status; @@ -7477,7 +6964,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) handled = 1; rtl_irq_disable(tp); - napi_schedule(&tp->napi); + napi_schedule_irqoff(&tp->napi); } } return IRQ_RETVAL(handled); @@ -7628,7 +7115,7 @@ static int rtl8169_close(struct net_device *dev) pm_runtime_get_sync(&pdev->dev); /* Update counters before going down */ - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); rtl_lock_work(tp); clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); @@ -7638,7 +7125,7 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - pci_free_irq(pdev, 0, dev); + pci_free_irq(pdev, 0, tp); dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, tp->RxPhyAddr); @@ -7683,7 +7170,7 @@ static int rtl_open(struct net_device *dev) if (!tp->RxDescArray) goto err_free_tx_0; - retval = rtl8169_init_ring(dev); + retval = rtl8169_init_ring(tp); if (retval < 0) goto err_free_rx_1; @@ -7693,7 +7180,7 @@ static int rtl_open(struct net_device *dev) rtl_request_firmware(tp); - retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev, + retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp, dev->name); if (retval < 0) goto err_release_fw_2; @@ -7706,13 +7193,11 @@ static int rtl_open(struct net_device *dev) rtl8169_init_phy(dev, tp); - __rtl8169_set_features(dev, dev->features); - rtl_pll_power_up(tp); - rtl_hw_start(dev); + rtl_hw_start(tp); - if (!rtl8169_init_counter_offsets(dev)) + if (!rtl8169_init_counter_offsets(tp)) netif_warn(tp, hw, dev, "counter reset/update failed\n"); netif_start_queue(dev); @@ -7781,7 +7266,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) * from tally counters. */ if (pm_runtime_active(&pdev->dev)) - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); /* * Subtract values fetched during initalization. @@ -7877,7 +7362,7 @@ static int rtl8169_runtime_suspend(struct device *device) /* Update counters before going runtime suspend */ rtl8169_rx_missed(dev); - rtl8169_update_counters(dev); + rtl8169_update_counters(tp); return 0; } @@ -8017,9 +7502,7 @@ static const struct net_device_ops rtl_netdev_ops = { }; static const struct rtl_cfg_info { - void (*hw_start)(struct net_device *); - unsigned int region; - unsigned int align; + void (*hw_start)(struct rtl8169_private *tp); u16 event_slow; unsigned int has_gmii:1; const struct rtl_coalesce_info *coalesce_info; @@ -8027,8 +7510,6 @@ static const struct rtl_cfg_info { } rtl_cfg_infos [] = { [RTL_CFG_0] = { .hw_start = rtl_hw_start_8169, - .region = 1, - .align = 0, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver, .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8169, @@ -8036,8 +7517,6 @@ static const struct rtl_cfg_info { }, [RTL_CFG_1] = { .hw_start = rtl_hw_start_8168, - .region = 2, - .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow, .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8168_8136, @@ -8045,8 +7524,6 @@ static const struct rtl_cfg_info { }, [RTL_CFG_2] = { .hw_start = rtl_hw_start_8101, - .region = 2, - .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver | PCSTimeout, .coalesce_info = rtl_coalesce_info_8168_8136, @@ -8122,20 +7599,10 @@ static void rtl_hw_init_8168ep(struct rtl8169_private *tp) static void rtl_hw_initialize(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_40: - case RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_43: - case RTL_GIGA_MAC_VER_44: - case RTL_GIGA_MAC_VER_45: - case RTL_GIGA_MAC_VER_46: - case RTL_GIGA_MAC_VER_47: - case RTL_GIGA_MAC_VER_48: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48: rtl_hw_init_8168g(tp); break; - case RTL_GIGA_MAC_VER_49: - case RTL_GIGA_MAC_VER_50: - case RTL_GIGA_MAC_VER_51: + case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51: rtl_hw_init_8168ep(tp); break; default: @@ -8146,11 +7613,10 @@ static void rtl_hw_initialize(struct rtl8169_private *tp) static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; - const unsigned int region = cfg->region; struct rtl8169_private *tp; struct mii_if_info *mii; struct net_device *dev; - int chipset, i; + int chipset, region, i; int rc; if (netif_msg_drv(&debug)) { @@ -8185,43 +7651,41 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); if (rc < 0) { - netif_err(tp, probe, dev, "enable failure\n"); + dev_err(&pdev->dev, "enable failure\n"); return rc; } if (pcim_set_mwi(pdev) < 0) - netif_info(tp, probe, dev, "Mem-Wr-Inval unavailable\n"); + dev_info(&pdev->dev, "Mem-Wr-Inval unavailable\n"); - /* make sure PCI base addr 1 is MMIO */ - if (!(pci_resource_flags(pdev, region) & IORESOURCE_MEM)) { - netif_err(tp, probe, dev, - "region #%d not an MMIO resource, aborting\n", - region); + /* use first MMIO region */ + region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1; + if (region < 0) { + dev_err(&pdev->dev, "no MMIO resource found\n"); return -ENODEV; } /* check for weird/broken PCI region reporting */ if (pci_resource_len(pdev, region) < R8169_REGS_SIZE) { - netif_err(tp, probe, dev, - "Invalid PCI region size(s), aborting\n"); + dev_err(&pdev->dev, "Invalid PCI region size(s), aborting\n"); return -ENODEV; } rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME); if (rc < 0) { - netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n"); + dev_err(&pdev->dev, "cannot remap MMIO, aborting\n"); return rc; } tp->mmio_addr = pcim_iomap_table(pdev)[region]; if (!pci_is_pcie(pdev)) - netif_info(tp, probe, dev, "not PCI Express\n"); + dev_info(&pdev->dev, "not PCI Express\n"); /* Identify chip attached to board */ - rtl8169_get_mac_version(tp, dev, cfg->default_ver); + rtl8169_get_mac_version(tp, cfg->default_ver); - tp->cp_cmd = 0; + tp->cp_cmd = RTL_R16(tp, CPlusCmd); if ((sizeof(dma_addr_t) > 4) && (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) && @@ -8236,7 +7700,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } else { rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (rc < 0) { - netif_err(tp, probe, dev, "DMA configuration failed\n"); + dev_err(&pdev->dev, "DMA configuration failed\n"); return rc; } } @@ -8254,18 +7718,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); rtl_init_mdio_ops(tp); - rtl_init_pll_power_ops(tp); rtl_init_jumbo_ops(tp); - rtl_init_csi_ops(tp); rtl8169_print_mac_version(tp); chipset = tp->mac_version; - tp->txd_version = rtl_chip_infos[chipset].txd_version; rc = rtl_alloc_irq(tp); if (rc < 0) { - netif_err(tp, probe, dev, "Can't allocate interrupt\n"); + dev_err(&pdev->dev, "Can't allocate interrupt\n"); return rc; } @@ -8293,29 +7754,18 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) u64_stats_init(&tp->tx_stats.syncp); /* Get MAC address */ - if (tp->mac_version == RTL_GIGA_MAC_VER_35 || - tp->mac_version == RTL_GIGA_MAC_VER_36 || - tp->mac_version == RTL_GIGA_MAC_VER_37 || - tp->mac_version == RTL_GIGA_MAC_VER_38 || - tp->mac_version == RTL_GIGA_MAC_VER_40 || - tp->mac_version == RTL_GIGA_MAC_VER_41 || - tp->mac_version == RTL_GIGA_MAC_VER_42 || - tp->mac_version == RTL_GIGA_MAC_VER_43 || - tp->mac_version == RTL_GIGA_MAC_VER_44 || - tp->mac_version == RTL_GIGA_MAC_VER_45 || - tp->mac_version == RTL_GIGA_MAC_VER_46 || - tp->mac_version == RTL_GIGA_MAC_VER_47 || - tp->mac_version == RTL_GIGA_MAC_VER_48 || - tp->mac_version == RTL_GIGA_MAC_VER_49 || - tp->mac_version == RTL_GIGA_MAC_VER_50 || - tp->mac_version == RTL_GIGA_MAC_VER_51) { - u16 mac_addr[3]; - + switch (tp->mac_version) { + u8 mac_addr[ETH_ALEN] __aligned(4); + case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: *(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC); - *(u16 *)&mac_addr[2] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC); + *(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC); - if (is_valid_ether_addr((u8 *)mac_addr)) - rtl_rar_set(tp, (u8 *)mac_addr); + if (is_valid_ether_addr(mac_addr)) + rtl_rar_set(tp, mac_addr); + break; + default: + break; } for (i = 0; i < ETH_ALEN; i++) dev->dev_addr[i] = RTL_R8(tp, MAC0 + i); @@ -8323,7 +7773,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &rtl8169_ethtool_ops; dev->watchdog_timeo = RTL8169_TX_TIMEOUT; - netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT); + netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT); /* don't enable SG, IP_CSUM and TSO by default - it might not work * properly for all devices */ @@ -8346,13 +7796,17 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Disallow toggling */ dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; - if (tp->txd_version == RTL_TD_0) + switch (rtl_chip_infos[chipset].txd_version) { + case RTL_TD_0: tp->tso_csum = rtl8169_tso_csum_v1; - else if (tp->txd_version == RTL_TD_1) { + break; + case RTL_TD_1: tp->tso_csum = rtl8169_tso_csum_v2; dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; - } else + break; + default: WARN_ON_ONCE(1); + } dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; @@ -8365,9 +7819,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->event_slow = cfg->event_slow; tp->coalesce_info = cfg->coalesce_info; - tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ? - ~(RxBOVF | RxFOVF) : ~0; - timer_setup(&tp->timer, rtl8169_phy_timer, 0); tp->rtl_fw = RTL_FIRMWARE_UNKNOWN; @@ -8384,15 +7835,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc < 0) return rc; - netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n", - rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr, - (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff), + netif_info(tp, probe, dev, "%s, %pM, XID %08x, IRQ %d\n", + rtl_chip_infos[chipset].name, dev->dev_addr, + (u32)(RTL_R32(tp, TxConfig) & 0xfcf0f8ff), pci_irq_vector(pdev, 0)); if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) { netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, " "tx checksumming: %s]\n", rtl_chip_infos[chipset].jumbo_max, - rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko"); + tp->mac_version <= RTL_GIGA_MAC_VER_06 ? "ok" : "ko"); } if (r8168_check_dash(tp)) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index b6b90a6314e3..5970d9e5ddf1 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -442,12 +442,22 @@ static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear, static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data, int enum_index) { - iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]); + u16 offset = mdp->reg_offset[enum_index]; + + if (WARN_ON(offset == SH_ETH_OFFSET_INVALID)) + return; + + iowrite32(data, mdp->tsu_addr + offset); } static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index) { - return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]); + u16 offset = mdp->reg_offset[enum_index]; + + if (WARN_ON(offset == SH_ETH_OFFSET_INVALID)) + return ~0U; + + return ioread32(mdp->tsu_addr + offset); } static void sh_eth_select_mii(struct net_device *ndev) @@ -2610,12 +2620,6 @@ static int sh_eth_change_mtu(struct net_device *ndev, int new_mtu) } /* For TSU_POSTn. Please refer to the manual about this (strange) bitfields */ -static void *sh_eth_tsu_get_post_reg_offset(struct sh_eth_private *mdp, - int entry) -{ - return sh_eth_tsu_get_offset(mdp, TSU_POST1) + (entry / 8 * 4); -} - static u32 sh_eth_tsu_get_post_mask(int entry) { return 0x0f << (28 - ((entry % 8) * 4)); @@ -2630,27 +2634,25 @@ static void sh_eth_tsu_enable_cam_entry_post(struct net_device *ndev, int entry) { struct sh_eth_private *mdp = netdev_priv(ndev); + int reg = TSU_POST1 + entry / 8; u32 tmp; - void *reg_offset; - reg_offset = sh_eth_tsu_get_post_reg_offset(mdp, entry); - tmp = ioread32(reg_offset); - iowrite32(tmp | sh_eth_tsu_get_post_bit(mdp, entry), reg_offset); + tmp = sh_eth_tsu_read(mdp, reg); + sh_eth_tsu_write(mdp, tmp | sh_eth_tsu_get_post_bit(mdp, entry), reg); } static bool sh_eth_tsu_disable_cam_entry_post(struct net_device *ndev, int entry) { struct sh_eth_private *mdp = netdev_priv(ndev); + int reg = TSU_POST1 + entry / 8; u32 post_mask, ref_mask, tmp; - void *reg_offset; - reg_offset = sh_eth_tsu_get_post_reg_offset(mdp, entry); post_mask = sh_eth_tsu_get_post_mask(entry); ref_mask = sh_eth_tsu_get_post_bit(mdp, entry) & ~post_mask; - tmp = ioread32(reg_offset); - iowrite32(tmp & ~post_mask, reg_offset); + tmp = sh_eth_tsu_read(mdp, reg); + sh_eth_tsu_write(mdp, tmp & ~post_mask, reg); /* If other port enables, the function returns "true" */ return tmp & ref_mask; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 056cb6093630..152d6948611c 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2783,6 +2783,8 @@ static int rocker_switchdev_event(struct notifier_block *unused, switch (event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (!fdb_info->added_by_user) + break; memcpy(&switchdev_work->fdb_info, ptr, sizeof(switchdev_work->fdb_info)); switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a4ebd8715494..88c1eee677c2 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1551,6 +1551,38 @@ static int efx_probe_interrupts(struct efx_nic *efx) return 0; } +#if defined(CONFIG_SMP) +static void efx_set_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + unsigned int cpu; + + efx_for_each_channel(channel, efx) { + cpu = cpumask_local_spread(channel->channel, + pcibus_to_node(efx->pci_dev->bus)); + irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); + } +} + +static void efx_clear_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + irq_set_affinity_hint(channel->irq, NULL); +} +#else +static void +efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} + +static void +efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} +#endif /* CONFIG_SMP */ + static int efx_soft_enable_interrupts(struct efx_nic *efx) { struct efx_channel *channel, *end_channel; @@ -3308,6 +3340,7 @@ static void efx_pci_remove_main(struct efx_nic *efx) cancel_work_sync(&efx->reset_work); efx_disable_interrupts(efx); + efx_clear_interrupt_affinity(efx); efx_nic_fini_interrupt(efx); efx_fini_port(efx); efx->type->fini(efx); @@ -3457,6 +3490,8 @@ static int efx_pci_probe_main(struct efx_nic *efx) rc = efx_nic_init_interrupt(efx); if (rc) goto fail5; + + efx_set_interrupt_affinity(efx); rc = efx_enable_interrupts(efx); if (rc) goto fail6; @@ -3464,6 +3499,7 @@ static int efx_pci_probe_main(struct efx_nic *efx) return 0; fail6: + efx_clear_interrupt_affinity(efx); efx_nic_fini_interrupt(efx); fail5: efx_fini_port(efx); diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig index 6bcfe27fc560..b80048ca82a0 100644 --- a/drivers/net/ethernet/socionext/Kconfig +++ b/drivers/net/ethernet/socionext/Kconfig @@ -14,6 +14,8 @@ if NET_VENDOR_SOCIONEXT config SNI_AVE tristate "Socionext AVE ethernet support" depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF + depends on HAS_IOMEM + select MFD_SYSCON select PHYLIB ---help--- Driver for gigabit ethernet MACs, called AVE, in the diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index f4c0b02ddad8..aa50331b7607 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1057,7 +1057,8 @@ static int netsec_netdev_load_microcode(struct netsec_priv *priv) return 0; } -static int netsec_reset_hardware(struct netsec_priv *priv) +static int netsec_reset_hardware(struct netsec_priv *priv, + bool load_ucode) { u32 value; int err; @@ -1102,11 +1103,14 @@ static int netsec_reset_hardware(struct netsec_priv *priv) netsec_write(priv, NETSEC_REG_NRM_RX_CONFIG, 1 << NETSEC_REG_DESC_ENDIAN); - err = netsec_netdev_load_microcode(priv); - if (err) { - netif_err(priv, probe, priv->ndev, - "%s: failed to load microcode (%d)\n", __func__, err); - return err; + if (load_ucode) { + err = netsec_netdev_load_microcode(priv); + if (err) { + netif_err(priv, probe, priv->ndev, + "%s: failed to load microcode (%d)\n", + __func__, err); + return err; + } } /* start DMA engines */ @@ -1313,8 +1317,8 @@ static int netsec_netdev_open(struct net_device *ndev) napi_enable(&priv->napi); netif_start_queue(ndev); - /* Enable RX intr. */ - netsec_write(priv, NETSEC_REG_INTEN_SET, NETSEC_IRQ_RX); + /* Enable TX+RX intr. */ + netsec_write(priv, NETSEC_REG_INTEN_SET, NETSEC_IRQ_RX | NETSEC_IRQ_TX); return 0; err3: @@ -1328,6 +1332,7 @@ err1: static int netsec_netdev_stop(struct net_device *ndev) { + int ret; struct netsec_priv *priv = netdev_priv(ndev); netif_stop_queue(priv->ndev); @@ -1343,12 +1348,14 @@ static int netsec_netdev_stop(struct net_device *ndev) netsec_uninit_pkt_dring(priv, NETSEC_RING_TX); netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); + ret = netsec_reset_hardware(priv, false); + phy_stop(ndev->phydev); phy_disconnect(ndev->phydev); pm_runtime_put_sync(priv->dev); - return 0; + return ret; } static int netsec_netdev_init(struct net_device *ndev) @@ -1364,7 +1371,7 @@ static int netsec_netdev_init(struct net_device *ndev) if (ret) goto err1; - ret = netsec_reset_hardware(priv); + ret = netsec_reset_hardware(priv, true); if (ret) goto err2; diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 0b3b7a460641..f7ecceeb1e28 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -11,6 +11,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/iopoll.h> +#include <linux/mfd/syscon.h> #include <linux/mii.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -18,6 +19,7 @@ #include <linux/of_mdio.h> #include <linux/of_platform.h> #include <linux/phy.h> +#include <linux/regmap.h> #include <linux/reset.h> #include <linux/types.h> #include <linux/u64_stats_sync.h> @@ -197,8 +199,16 @@ #define AVE_INTM_COUNT 20 #define AVE_FORCE_TXINTCNT 1 +/* SG */ +#define SG_ETPINMODE 0x540 +#define SG_ETPINMODE_EXTPHY BIT(1) /* for LD11 */ +#define SG_ETPINMODE_RMII(ins) BIT(ins) + #define IS_DESC_64BIT(p) ((p)->data->is_desc_64bit) +#define AVE_MAX_CLKS 4 +#define AVE_MAX_RSTS 2 + enum desc_id { AVE_DESCID_RX, AVE_DESCID_TX, @@ -225,10 +235,6 @@ struct ave_desc_info { struct ave_desc *desc; /* skb info related descriptor */ }; -struct ave_soc_data { - bool is_desc_64bit; -}; - struct ave_stats { struct u64_stats_sync syncp; u64 packets; @@ -245,11 +251,16 @@ struct ave_private { int phy_id; unsigned int desc_size; u32 msg_enable; - struct clk *clk; - struct reset_control *rst; + int nclks; + struct clk *clk[AVE_MAX_CLKS]; + int nrsts; + struct reset_control *rst[AVE_MAX_RSTS]; phy_interface_t phy_mode; struct phy_device *phydev; struct mii_bus *mdio; + struct regmap *regmap; + unsigned int pinmode_mask; + unsigned int pinmode_val; /* stats */ struct ave_stats stats_rx; @@ -272,6 +283,14 @@ struct ave_private { const struct ave_soc_data *data; }; +struct ave_soc_data { + bool is_desc_64bit; + const char *clock_names[AVE_MAX_CLKS]; + const char *reset_names[AVE_MAX_RSTS]; + int (*get_pinmode)(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg); +}; + static u32 ave_desc_read(struct net_device *ndev, enum desc_id id, int entry, int offset) { @@ -1153,20 +1172,30 @@ static int ave_init(struct net_device *ndev) struct device_node *np = dev->of_node; struct device_node *mdio_np; struct phy_device *phydev; - int ret; + int nc, nr, ret; /* enable clk because of hw access until ndo_open */ - ret = clk_prepare_enable(priv->clk); - if (ret) { - dev_err(dev, "can't enable clock\n"); - return ret; + for (nc = 0; nc < priv->nclks; nc++) { + ret = clk_prepare_enable(priv->clk[nc]); + if (ret) { + dev_err(dev, "can't enable clock\n"); + goto out_clk_disable; + } } - ret = reset_control_deassert(priv->rst); - if (ret) { - dev_err(dev, "can't deassert reset\n"); - goto out_clk_disable; + + for (nr = 0; nr < priv->nrsts; nr++) { + ret = reset_control_deassert(priv->rst[nr]); + if (ret) { + dev_err(dev, "can't deassert reset\n"); + goto out_reset_assert; + } } + ret = regmap_update_bits(priv->regmap, SG_ETPINMODE, + priv->pinmode_mask, priv->pinmode_val); + if (ret) + return ret; + ave_global_reset(ndev); mdio_np = of_get_child_by_name(np, "mdio"); @@ -1207,9 +1236,11 @@ static int ave_init(struct net_device *ndev) out_mdio_unregister: mdiobus_unregister(priv->mdio); out_reset_assert: - reset_control_assert(priv->rst); + while (--nr >= 0) + reset_control_assert(priv->rst[nr]); out_clk_disable: - clk_disable_unprepare(priv->clk); + while (--nc >= 0) + clk_disable_unprepare(priv->clk[nc]); return ret; } @@ -1217,13 +1248,16 @@ out_clk_disable: static void ave_uninit(struct net_device *ndev) { struct ave_private *priv = netdev_priv(ndev); + int i; phy_disconnect(priv->phydev); mdiobus_unregister(priv->mdio); /* disable clk because of hw access after ndo_stop */ - reset_control_assert(priv->rst); - clk_disable_unprepare(priv->clk); + for (i = 0; i < priv->nrsts; i++) + reset_control_assert(priv->rst[i]); + for (i = 0; i < priv->nclks; i++) + clk_disable_unprepare(priv->clk[i]); } static int ave_open(struct net_device *ndev) @@ -1520,6 +1554,7 @@ static int ave_probe(struct platform_device *pdev) const struct ave_soc_data *data; struct device *dev = &pdev->dev; char buf[ETHTOOL_FWVERS_LEN]; + struct of_phandle_args args; phy_interface_t phy_mode; struct ave_private *priv; struct net_device *ndev; @@ -1527,8 +1562,9 @@ static int ave_probe(struct platform_device *pdev) struct resource *res; const void *mac_addr; void __iomem *base; + const char *name; + int i, irq, ret; u64 dma_mask; - int irq, ret; u32 ave_id; data = of_device_get_match_data(dev); @@ -1541,12 +1577,6 @@ static int ave_probe(struct platform_device *pdev) dev_err(dev, "phy-mode not found\n"); return -EINVAL; } - if ((!phy_interface_mode_is_rgmii(phy_mode)) && - phy_mode != PHY_INTERFACE_MODE_RMII && - phy_mode != PHY_INTERFACE_MODE_MII) { - dev_err(dev, "phy-mode is invalid\n"); - return -EINVAL; - } irq = platform_get_irq(pdev, 0); if (irq < 0) { @@ -1614,15 +1644,47 @@ static int ave_probe(struct platform_device *pdev) u64_stats_init(&priv->stats_tx.syncp); u64_stats_init(&priv->stats_rx.syncp); - priv->clk = devm_clk_get(dev, NULL); - if (IS_ERR(priv->clk)) { - ret = PTR_ERR(priv->clk); - goto out_free_netdev; + for (i = 0; i < AVE_MAX_CLKS; i++) { + name = priv->data->clock_names[i]; + if (!name) + break; + priv->clk[i] = devm_clk_get(dev, name); + if (IS_ERR(priv->clk[i])) { + ret = PTR_ERR(priv->clk[i]); + goto out_free_netdev; + } + priv->nclks++; + } + + for (i = 0; i < AVE_MAX_RSTS; i++) { + name = priv->data->reset_names[i]; + if (!name) + break; + priv->rst[i] = devm_reset_control_get_shared(dev, name); + if (IS_ERR(priv->rst[i])) { + ret = PTR_ERR(priv->rst[i]); + goto out_free_netdev; + } + priv->nrsts++; } - priv->rst = devm_reset_control_get_optional_shared(dev, NULL); - if (IS_ERR(priv->rst)) { - ret = PTR_ERR(priv->rst); + ret = of_parse_phandle_with_fixed_args(np, + "socionext,syscon-phy-mode", + 1, 0, &args); + if (ret) { + netdev_err(ndev, "can't get syscon-phy-mode property\n"); + goto out_free_netdev; + } + priv->regmap = syscon_node_to_regmap(args.np); + of_node_put(args.np); + if (IS_ERR(priv->regmap)) { + netdev_err(ndev, "can't map syscon-phy-mode\n"); + ret = PTR_ERR(priv->regmap); + goto out_free_netdev; + } + ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]); + if (ret) { + netdev_err(ndev, "invalid phy-mode setting\n"); goto out_free_netdev; } @@ -1685,24 +1747,148 @@ static int ave_remove(struct platform_device *pdev) return 0; } +static int ave_pro4_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 0) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_RMII(0); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_RMII(0); + break; + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_RGMII: + priv->pinmode_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ave_ld11_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 0) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_INTERNAL: + priv->pinmode_val = 0; + break; + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ave_ld20_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 0) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_RMII(0); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_RMII(0); + break; + case PHY_INTERFACE_MODE_RGMII: + priv->pinmode_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ave_pxs3_get_pinmode(struct ave_private *priv, + phy_interface_t phy_mode, u32 arg) +{ + if (arg > 1) + return -EINVAL; + + priv->pinmode_mask = SG_ETPINMODE_RMII(arg); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_RMII: + priv->pinmode_val = SG_ETPINMODE_RMII(arg); + break; + case PHY_INTERFACE_MODE_RGMII: + priv->pinmode_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + static const struct ave_soc_data ave_pro4_data = { .is_desc_64bit = false, + .clock_names = { + "gio", "ether", "ether-gb", "ether-phy", + }, + .reset_names = { + "gio", "ether", + }, + .get_pinmode = ave_pro4_get_pinmode, }; static const struct ave_soc_data ave_pxs2_data = { .is_desc_64bit = false, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_pro4_get_pinmode, }; static const struct ave_soc_data ave_ld11_data = { .is_desc_64bit = false, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_ld11_get_pinmode, }; static const struct ave_soc_data ave_ld20_data = { .is_desc_64bit = true, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_ld20_get_pinmode, }; static const struct ave_soc_data ave_pxs3_data = { .is_desc_64bit = false, + .clock_names = { + "ether", + }, + .reset_names = { + "ether", + }, + .get_pinmode = ave_pxs3_get_pinmode, }; static const struct of_device_id of_ave_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 972e4ef6d414..e3b578b4f7cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -4,7 +4,8 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \ dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \ mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o \ - dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o $(stmmac-y) + dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \ + $(stmmac-y) # Ordering matters. Generic driver must be last. obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index e93c40b4631e..b9c9003060c5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -24,7 +24,7 @@ #include "stmmac.h" -static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) +static int jumbo_frm(void *p, struct sk_buff *skb, int csum) { struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p; unsigned int nopaged_len = skb_headlen(skb); @@ -51,8 +51,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].buf = des2; tx_q->tx_skbuff_dma[entry].len = bmax; /* do not close the descriptor and do not set own bit */ - priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE, - 0, false, skb->len); + stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, STMMAC_CHAIN_MODE, + 0, false, skb->len); while (len != 0) { tx_q->tx_skbuff[entry] = NULL; @@ -68,9 +68,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) return -1; tx_q->tx_skbuff_dma[entry].buf = des2; tx_q->tx_skbuff_dma[entry].len = bmax; - priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum, - STMMAC_CHAIN_MODE, 1, - false, skb->len); + stmmac_prepare_tx_desc(priv, desc, 0, bmax, csum, + STMMAC_CHAIN_MODE, 1, false, skb->len); len -= bmax; i++; } else { @@ -83,9 +82,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].buf = des2; tx_q->tx_skbuff_dma[entry].len = len; /* last descriptor can be set now */ - priv->hw->desc->prepare_tx_desc(desc, 0, len, csum, - STMMAC_CHAIN_MODE, 1, - true, skb->len); + stmmac_prepare_tx_desc(priv, desc, 0, len, csum, + STMMAC_CHAIN_MODE, 1, true, skb->len); len = 0; } } @@ -95,7 +93,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) return entry; } -static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc) +static unsigned int is_jumbo_frm(int len, int enh_desc) { unsigned int ret = 0; @@ -107,7 +105,7 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc) return ret; } -static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr, +static void init_dma_chain(void *des, dma_addr_t phy_addr, unsigned int size, unsigned int extend_desc) { /* @@ -137,7 +135,7 @@ static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr, } } -static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) +static void refill_desc3(void *priv_ptr, struct dma_desc *p) { struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)priv_ptr; struct stmmac_priv *priv = rx_q->priv_data; @@ -153,7 +151,7 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) sizeof(struct dma_desc))); } -static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) +static void clean_desc3(void *priv_ptr, struct dma_desc *p) { struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr; struct stmmac_priv *priv = tx_q->priv_data; @@ -171,9 +169,9 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) } const struct stmmac_mode_ops chain_mode_ops = { - .init = stmmac_init_dma_chain, - .is_jumbo_frm = stmmac_is_jumbo_frm, - .jumbo_frm = stmmac_jumbo_frm, - .refill_desc3 = stmmac_refill_desc3, - .clean_desc3 = stmmac_clean_desc3, + .init = init_dma_chain, + .is_jumbo_frm = is_jumbo_frm, + .jumbo_frm = jumbo_frm, + .refill_desc3 = refill_desc3, + .clean_desc3 = clean_desc3, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index ad2388aee463..627e905b6d76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -32,12 +32,14 @@ #endif #include "descs.h" +#include "hwif.h" #include "mmc.h" /* Synopsys Core versions */ #define DWMAC_CORE_3_40 0x34 #define DWMAC_CORE_3_50 0x35 #define DWMAC_CORE_4_00 0x40 +#define DWMAC_CORE_4_10 0x41 #define DWMAC_CORE_5_00 0x50 #define DWMAC_CORE_5_10 0x51 #define STMMAC_CHAN0 0 /* Always supported and default for all chips */ @@ -377,197 +379,11 @@ struct dma_features { #define JUMBO_LEN 9000 -/* Descriptors helpers */ -struct stmmac_desc_ops { - /* DMA RX descriptor ring initialization */ - void (*init_rx_desc) (struct dma_desc *p, int disable_rx_ic, int mode, - int end); - /* DMA TX descriptor ring initialization */ - void (*init_tx_desc) (struct dma_desc *p, int mode, int end); - - /* Invoked by the xmit function to prepare the tx descriptor */ - void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len, - bool csum_flag, int mode, bool tx_own, - bool ls, unsigned int tot_pkt_len); - void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1, - int len2, bool tx_own, bool ls, - unsigned int tcphdrlen, - unsigned int tcppayloadlen); - /* Set/get the owner of the descriptor */ - void (*set_tx_owner) (struct dma_desc *p); - int (*get_tx_owner) (struct dma_desc *p); - /* Clean the tx descriptor as soon as the tx irq is received */ - void (*release_tx_desc) (struct dma_desc *p, int mode); - /* Clear interrupt on tx frame completion. When this bit is - * set an interrupt happens as soon as the frame is transmitted */ - void (*set_tx_ic)(struct dma_desc *p); - /* Last tx segment reports the transmit status */ - int (*get_tx_ls) (struct dma_desc *p); - /* Return the transmit status looking at the TDES1 */ - int (*tx_status) (void *data, struct stmmac_extra_stats *x, - struct dma_desc *p, void __iomem *ioaddr); - /* Get the buffer size from the descriptor */ - int (*get_tx_len) (struct dma_desc *p); - /* Handle extra events on specific interrupts hw dependent */ - void (*set_rx_owner) (struct dma_desc *p); - /* Get the receive frame size */ - int (*get_rx_frame_len) (struct dma_desc *p, int rx_coe_type); - /* Return the reception status looking at the RDES1 */ - int (*rx_status) (void *data, struct stmmac_extra_stats *x, - struct dma_desc *p); - void (*rx_extended_status) (void *data, struct stmmac_extra_stats *x, - struct dma_extended_desc *p); - /* Set tx timestamp enable bit */ - void (*enable_tx_timestamp) (struct dma_desc *p); - /* get tx timestamp status */ - int (*get_tx_timestamp_status) (struct dma_desc *p); - /* get timestamp value */ - u64(*get_timestamp) (void *desc, u32 ats); - /* get rx timestamp status */ - int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats); - /* Display ring */ - void (*display_ring)(void *head, unsigned int size, bool rx); - /* set MSS via context descriptor */ - void (*set_mss)(struct dma_desc *p, unsigned int mss); -}; - extern const struct stmmac_desc_ops enh_desc_ops; extern const struct stmmac_desc_ops ndesc_ops; -/* Specific DMA helpers */ -struct stmmac_dma_ops { - /* DMA core initialization */ - int (*reset)(void __iomem *ioaddr); - void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx, u32 dma_rx, int atds); - void (*init_chan)(void __iomem *ioaddr, - struct stmmac_dma_cfg *dma_cfg, u32 chan); - void (*init_rx_chan)(void __iomem *ioaddr, - struct stmmac_dma_cfg *dma_cfg, - u32 dma_rx_phy, u32 chan); - void (*init_tx_chan)(void __iomem *ioaddr, - struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx_phy, u32 chan); - /* Configure the AXI Bus Mode Register */ - void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi); - /* Dump DMA registers */ - void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space); - /* Set tx/rx threshold in the csr6 register - * An invalid value enables the store-and-forward mode */ - void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode, - int rxfifosz); - void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel, - int fifosz, u8 qmode); - void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel, - int fifosz, u8 qmode); - /* To track extra statistic (if supported) */ - void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, - void __iomem *ioaddr); - void (*enable_dma_transmission) (void __iomem *ioaddr); - void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan); - void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan); - void (*start_tx)(void __iomem *ioaddr, u32 chan); - void (*stop_tx)(void __iomem *ioaddr, u32 chan); - void (*start_rx)(void __iomem *ioaddr, u32 chan); - void (*stop_rx)(void __iomem *ioaddr, u32 chan); - int (*dma_interrupt) (void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan); - /* If supported then get the optional core features */ - void (*get_hw_feature)(void __iomem *ioaddr, - struct dma_features *dma_cap); - /* Program the HW RX Watchdog */ - void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan); - void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); - void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); - void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); - void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); - void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan); -}; - struct mac_device_info; -/* Helpers to program the MAC core */ -struct stmmac_ops { - /* MAC core initialization */ - void (*core_init)(struct mac_device_info *hw, struct net_device *dev); - /* Enable the MAC RX/TX */ - void (*set_mac)(void __iomem *ioaddr, bool enable); - /* Enable and verify that the IPC module is supported */ - int (*rx_ipc)(struct mac_device_info *hw); - /* Enable RX Queues */ - void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue); - /* RX Queues Priority */ - void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue); - /* TX Queues Priority */ - void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue); - /* RX Queues Routing */ - void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet, - u32 queue); - /* Program RX Algorithms */ - void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg); - /* Program TX Algorithms */ - void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg); - /* Set MTL TX queues weight */ - void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw, - u32 weight, u32 queue); - /* RX MTL queue to RX dma mapping */ - void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan); - /* Configure AV Algorithm */ - void (*config_cbs)(struct mac_device_info *hw, u32 send_slope, - u32 idle_slope, u32 high_credit, u32 low_credit, - u32 queue); - /* Dump MAC registers */ - void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space); - /* Handle extra events on specific interrupts hw dependent */ - int (*host_irq_status)(struct mac_device_info *hw, - struct stmmac_extra_stats *x); - /* Handle MTL interrupts */ - int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan); - /* Multicast filter setting */ - void (*set_filter)(struct mac_device_info *hw, struct net_device *dev); - /* Flow control setting */ - void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex, - unsigned int fc, unsigned int pause_time, u32 tx_cnt); - /* Set power management mode (e.g. magic frame) */ - void (*pmt)(struct mac_device_info *hw, unsigned long mode); - /* Set/Get Unicast MAC addresses */ - void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr, - unsigned int reg_n); - void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr, - unsigned int reg_n); - void (*set_eee_mode)(struct mac_device_info *hw, - bool en_tx_lpi_clockgating); - void (*reset_eee_mode)(struct mac_device_info *hw); - void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw); - void (*set_eee_pls)(struct mac_device_info *hw, int link); - void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x, - u32 rx_queues, u32 tx_queues); - /* PCS calls */ - void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral, - bool loopback); - void (*pcs_rane)(void __iomem *ioaddr, bool restart); - void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv); - /* Safety Features */ - int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp); - bool (*safety_feat_irq_status)(struct net_device *ndev, - void __iomem *ioaddr, unsigned int asp, - struct stmmac_safety_stats *stats); - const char *(*safety_feat_dump)(struct stmmac_safety_stats *stats, - int index, unsigned long *count); -}; - -/* PTP and HW Timer helpers */ -struct stmmac_hwtimestamp { - void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data); - u32 (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock, - int gmac4); - int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec); - int (*config_addend) (void __iomem *ioaddr, u32 addend); - int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec, - int add_sub, int gmac4); - u64(*get_systime) (void __iomem *ioaddr); -}; - extern const struct stmmac_hwtimestamp stmmac_ptp; extern const struct stmmac_mode_ops dwmac4_ring_mode_ops; @@ -590,18 +406,6 @@ struct mii_regs { unsigned int clk_csr_mask; }; -/* Helpers to manage the descriptors for chain and ring modes */ -struct stmmac_mode_ops { - void (*init) (void *des, dma_addr_t phy_addr, unsigned int size, - unsigned int extend_desc); - unsigned int (*is_jumbo_frm) (int len, int ehn_desc); - int (*jumbo_frm)(void *priv, struct sk_buff *skb, int csum); - int (*set_16kib_bfsize)(int mtu); - void (*init_desc3)(struct dma_desc *p); - void (*refill_desc3) (void *priv, struct dma_desc *p); - void (*clean_desc3) (void *priv, struct dma_desc *p); -}; - struct mac_device_info { const struct stmmac_ops *mac; const struct stmmac_desc_ops *desc; @@ -625,12 +429,9 @@ struct stmmac_rx_routing { u32 reg_shift; }; -struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, - int *synopsys_id); -struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id); -struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, int *synopsys_id); +int dwmac100_setup(struct stmmac_priv *priv); +int dwmac1000_setup(struct stmmac_priv *priv); +int dwmac4_setup(struct stmmac_priv *priv); void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low); @@ -650,24 +451,4 @@ extern const struct stmmac_mode_ops ring_mode_ops; extern const struct stmmac_mode_ops chain_mode_ops; extern const struct stmmac_desc_ops dwmac4_desc_ops; -/** - * stmmac_get_synopsys_id - return the SYINID. - * @priv: driver private structure - * Description: this simple function is to decode and return the SYINID - * starting from the HW core register. - */ -static inline u32 stmmac_get_synopsys_id(u32 hwid) -{ - /* Check Synopsys Id (not available on old chips) */ - if (likely(hwid)) { - u32 uid = ((hwid & 0x0000ff00) >> 8); - u32 synid = (hwid & 0x000000ff); - - pr_info("stmmac - user ID: 0x%x, Synopsys ID: 0x%x\n", - uid, synid); - - return synid; - } - return 0; -} #endif /* __COMMON_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 7cb794094a70..4ff231df7322 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -18,6 +18,7 @@ #include <linux/io.h> #include <linux/ioport.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/of_net.h> #include <linux/mfd/syscon.h> #include <linux/platform_device.h> @@ -29,6 +30,10 @@ #define PRG_ETH0_RGMII_MODE BIT(0) +#define PRG_ETH0_EXT_PHY_MODE_MASK GENMASK(2, 0) +#define PRG_ETH0_EXT_RGMII_MODE 1 +#define PRG_ETH0_EXT_RMII_MODE 4 + /* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */ #define PRG_ETH0_CLK_M250_SEL_SHIFT 4 #define PRG_ETH0_CLK_M250_SEL_MASK GENMASK(4, 4) @@ -47,12 +52,20 @@ #define MUX_CLK_NUM_PARENTS 2 +struct meson8b_dwmac; + +struct meson8b_dwmac_data { + int (*set_phy_mode)(struct meson8b_dwmac *dwmac); +}; + struct meson8b_dwmac { - struct device *dev; - void __iomem *regs; - phy_interface_t phy_mode; - struct clk *rgmii_tx_clk; - u32 tx_delay_ns; + struct device *dev; + void __iomem *regs; + + const struct meson8b_dwmac_data *data; + phy_interface_t phy_mode; + struct clk *rgmii_tx_clk; + u32 tx_delay_ns; }; struct meson8b_dwmac_clk_configs { @@ -171,6 +184,59 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) return 0; } +static int meson8b_set_phy_mode(struct meson8b_dwmac *dwmac) +{ + switch (dwmac->phy_mode) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + /* enable RGMII mode */ + meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, + PRG_ETH0_RGMII_MODE, + PRG_ETH0_RGMII_MODE); + break; + case PHY_INTERFACE_MODE_RMII: + /* disable RGMII mode -> enables RMII mode */ + meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, + PRG_ETH0_RGMII_MODE, 0); + break; + default: + dev_err(dwmac->dev, "fail to set phy-mode %s\n", + phy_modes(dwmac->phy_mode)); + return -EINVAL; + } + + return 0; +} + +static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac) +{ + switch (dwmac->phy_mode) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + /* enable RGMII mode */ + meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, + PRG_ETH0_EXT_PHY_MODE_MASK, + PRG_ETH0_EXT_RGMII_MODE); + break; + case PHY_INTERFACE_MODE_RMII: + /* disable RGMII mode -> enables RMII mode */ + meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, + PRG_ETH0_EXT_PHY_MODE_MASK, + PRG_ETH0_EXT_RMII_MODE); + break; + default: + dev_err(dwmac->dev, "fail to set phy-mode %s\n", + phy_modes(dwmac->phy_mode)); + return -EINVAL; + } + + return 0; +} + static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) { int ret; @@ -188,10 +254,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_TXID: - /* enable RGMII mode */ - meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE, - PRG_ETH0_RGMII_MODE); - /* only relevant for RMII mode -> disable in RGMII mode */ meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_INVERTED_RMII_CLK, 0); @@ -224,10 +286,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) break; case PHY_INTERFACE_MODE_RMII: - /* disable RGMII mode -> enables RMII mode */ - meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE, - 0); - /* invert internal clk_rmii_i to generate 25/2.5 tx_rx_clk */ meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_INVERTED_RMII_CLK, @@ -274,6 +332,11 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } + dwmac->data = (const struct meson8b_dwmac_data *) + of_device_get_match_data(&pdev->dev); + if (!dwmac->data) + return -EINVAL; + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); dwmac->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(dwmac->regs)) { @@ -298,6 +361,10 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) if (ret) goto err_remove_config_dt; + ret = dwmac->data->set_phy_mode(dwmac); + if (ret) + goto err_remove_config_dt; + ret = meson8b_init_prg_eth(dwmac); if (ret) goto err_remove_config_dt; @@ -316,10 +383,31 @@ err_remove_config_dt: return ret; } +static const struct meson8b_dwmac_data meson8b_dwmac_data = { + .set_phy_mode = meson8b_set_phy_mode, +}; + +static const struct meson8b_dwmac_data meson_axg_dwmac_data = { + .set_phy_mode = meson_axg_set_phy_mode, +}; + static const struct of_device_id meson8b_dwmac_match[] = { - { .compatible = "amlogic,meson8b-dwmac" }, - { .compatible = "amlogic,meson8m2-dwmac" }, - { .compatible = "amlogic,meson-gxbb-dwmac" }, + { + .compatible = "amlogic,meson8b-dwmac", + .data = &meson8b_dwmac_data, + }, + { + .compatible = "amlogic,meson8m2-dwmac", + .data = &meson8b_dwmac_data, + }, + { + .compatible = "amlogic,meson-gxbb-dwmac", + .data = &meson8b_dwmac_data, + }, + { + .compatible = "amlogic,meson-axg-dwmac", + .data = &meson_axg_dwmac_data, + }, { } }; MODULE_DEVICE_TABLE(of, meson8b_dwmac_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index c02d36629c52..184ca13c8f79 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -29,7 +29,6 @@ #define GMAC_MII_DATA 0x00000014 /* MII Data */ #define GMAC_FLOW_CTRL 0x00000018 /* Flow Control */ #define GMAC_VLAN_TAG 0x0000001c /* VLAN Tag */ -#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ #define GMAC_DEBUG 0x00000024 /* GMAC debug register */ #define GMAC_WAKEUP_FILTER 0x00000028 /* Wake-up Frame Filter */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index ef10baf14186..0877bde6e860 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -27,6 +27,7 @@ #include <linux/ethtool.h> #include <net/dsa.h> #include <asm/io.h> +#include "stmmac.h" #include "stmmac_pcs.h" #include "dwmac1000.h" @@ -498,7 +499,7 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x, x->mac_gmii_rx_proto_engine++; } -static const struct stmmac_ops dwmac1000_ops = { +const struct stmmac_ops dwmac1000_ops = { .core_init = dwmac1000_core_init, .set_mac = stmmac_set_mac, .rx_ipc = dwmac1000_rx_ipc_enable, @@ -519,28 +520,21 @@ static const struct stmmac_ops dwmac1000_ops = { .pcs_get_adv_lp = dwmac1000_get_adv_lp, }; -struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, - int *synopsys_id) +int dwmac1000_setup(struct stmmac_priv *priv) { - struct mac_device_info *mac; - u32 hwid = readl(ioaddr + GMAC_VERSION); + struct mac_device_info *mac = priv->hw; - mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); - if (!mac) - return NULL; + dev_info(priv->device, "\tDWMAC1000\n"); - mac->pcsr = ioaddr; - mac->multicast_filter_bins = mcbins; - mac->unicast_filter_entries = perfect_uc_entries; + priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->pcsr = priv->ioaddr; + mac->multicast_filter_bins = priv->plat->multicast_filter_bins; + mac->unicast_filter_entries = priv->plat->unicast_filter_entries; mac->mcast_bits_log2 = 0; if (mac->multicast_filter_bins) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); - mac->mac = &dwmac1000_ops; - mac->dma = &dwmac1000_dma_ops; - mac->link.duplex = GMAC_CONTROL_DM; mac->link.speed10 = GMAC_CONTROL_PS; mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES; @@ -555,8 +549,5 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, mac->mii.clk_csr_shift = 2; mac->mii.clk_csr_mask = GENMASK(5, 2); - /* Get and dump the chip ID */ - *synopsys_id = stmmac_get_synopsys_id(hwid); - - return mac; + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 91b23f9db31a..b735143987e1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -27,6 +27,7 @@ #include <linux/crc32.h> #include <net/dsa.h> #include <asm/io.h> +#include "stmmac.h" #include "dwmac100.h" static void dwmac100_core_init(struct mac_device_info *hw, @@ -159,7 +160,7 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode) return; } -static const struct stmmac_ops dwmac100_ops = { +const struct stmmac_ops dwmac100_ops = { .core_init = dwmac100_core_init, .set_mac = stmmac_set_mac, .rx_ipc = dwmac100_rx_ipc_enable, @@ -172,20 +173,13 @@ static const struct stmmac_ops dwmac100_ops = { .get_umac_addr = dwmac100_get_umac_addr, }; -struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id) +int dwmac100_setup(struct stmmac_priv *priv) { - struct mac_device_info *mac; + struct mac_device_info *mac = priv->hw; - mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); - if (!mac) - return NULL; - - pr_info("\tDWMAC100\n"); - - mac->pcsr = ioaddr; - mac->mac = &dwmac100_ops; - mac->dma = &dwmac100_dma_ops; + dev_info(priv->device, "\tDWMAC100\n"); + mac->pcsr = priv->ioaddr; mac->link.duplex = MAC_CONTROL_F; mac->link.speed10 = 0; mac->link.speed100 = 0; @@ -200,8 +194,5 @@ struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id) mac->mii.clk_csr_shift = 2; mac->mii.clk_csr_mask = GENMASK(5, 2); - /* Synopsys Id is not available on old chips */ - *synopsys_id = 0; - - return mac; + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index dedd40613090..03eab9077c1c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -34,7 +34,6 @@ #define GMAC_PCS_BASE 0x000000e0 #define GMAC_PHYIF_CONTROL_STATUS 0x000000f8 #define GMAC_PMT 0x000000c0 -#define GMAC_VERSION 0x00000110 #define GMAC_DEBUG 0x00000114 #define GMAC_HW_FEATURE0 0x0000011c #define GMAC_HW_FEATURE1 0x00000120 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 517b1f6736a8..7289b3b47d8e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -18,6 +18,7 @@ #include <linux/ethtool.h> #include <linux/io.h> #include <net/dsa.h> +#include "stmmac.h" #include "stmmac_pcs.h" #include "dwmac4.h" #include "dwmac5.h" @@ -700,7 +701,7 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x, x->mac_gmii_rx_proto_engine++; } -static const struct stmmac_ops dwmac4_ops = { +const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -731,7 +732,7 @@ static const struct stmmac_ops dwmac4_ops = { .set_filter = dwmac4_set_filter, }; -static const struct stmmac_ops dwmac410_ops = { +const struct stmmac_ops dwmac410_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -762,7 +763,7 @@ static const struct stmmac_ops dwmac410_ops = { .set_filter = dwmac4_set_filter, }; -static const struct stmmac_ops dwmac510_ops = { +const struct stmmac_ops dwmac510_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -796,19 +797,16 @@ static const struct stmmac_ops dwmac510_ops = { .safety_feat_dump = dwmac5_safety_feat_dump, }; -struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, int *synopsys_id) +int dwmac4_setup(struct stmmac_priv *priv) { - struct mac_device_info *mac; - u32 hwid = readl(ioaddr + GMAC_VERSION); + struct mac_device_info *mac = priv->hw; - mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); - if (!mac) - return NULL; + dev_info(priv->device, "\tDWMAC4/5\n"); - mac->pcsr = ioaddr; - mac->multicast_filter_bins = mcbins; - mac->unicast_filter_entries = perfect_uc_entries; + priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->pcsr = priv->ioaddr; + mac->multicast_filter_bins = priv->plat->multicast_filter_bins; + mac->unicast_filter_entries = priv->plat->unicast_filter_entries; mac->mcast_bits_log2 = 0; if (mac->multicast_filter_bins) @@ -828,20 +826,5 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins, mac->mii.clk_csr_shift = 8; mac->mii.clk_csr_mask = GENMASK(11, 8); - /* Get and dump the chip ID */ - *synopsys_id = stmmac_get_synopsys_id(hwid); - - if (*synopsys_id > DWMAC_CORE_4_00) - mac->dma = &dwmac410_dma_ops; - else - mac->dma = &dwmac4_dma_ops; - - if (*synopsys_id >= DWMAC_CORE_5_10) - mac->mac = &dwmac510_ops; - else if (*synopsys_id >= DWMAC_CORE_4_00) - mac->mac = &dwmac410_ops; - else - mac->mac = &dwmac4_ops; - - return mac; + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 2a6521d33e43..65ed896c13cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -223,7 +223,7 @@ static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p) return 0; } -static inline u64 dwmac4_get_timestamp(void *desc, u32 ats) +static inline void dwmac4_get_timestamp(void *desc, u32 ats, u64 *ts) { struct dma_desc *p = (struct dma_desc *)desc; u64 ns; @@ -232,7 +232,7 @@ static inline u64 dwmac4_get_timestamp(void *desc, u32 ats) /* convert high/sec time stamp value to nanosecond */ ns += le32_to_cpu(p->des1) * 1000000000ULL; - return ns; + *ts = ns; } static int dwmac4_rx_check_timestamp(void *desc) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 860de39999c7..2978550bb7f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -237,15 +237,16 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp) return 0; } -bool dwmac5_safety_feat_irq_status(struct net_device *ndev, +int dwmac5_safety_feat_irq_status(struct net_device *ndev, void __iomem *ioaddr, unsigned int asp, struct stmmac_safety_stats *stats) { - bool ret = false, err, corr; + bool err, corr; u32 mtl, dma; + int ret = 0; if (!asp) - return false; + return -EINVAL; mtl = readl(ioaddr + MTL_SAFETY_INT_STATUS); dma = readl(ioaddr + DMA_SAFETY_INT_STATUS); @@ -282,17 +283,19 @@ static const struct dwmac5_error { { dwmac5_dma_errors }, }; -const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats, - int index, unsigned long *count) +int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats, + int index, unsigned long *count, const char **desc) { int module = index / 32, offset = index % 32; unsigned long *ptr = (unsigned long *)stats; if (module >= ARRAY_SIZE(dwmac5_all_errors)) - return NULL; + return -EINVAL; if (!dwmac5_all_errors[module].desc[offset].valid) - return NULL; + return -EINVAL; if (count) *count = *(ptr + index); - return dwmac5_all_errors[module].desc[offset].desc; + if (desc) + *desc = dwmac5_all_errors[module].desc[offset].desc; + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index a0d2c44711b9..bd4c466d303e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -43,10 +43,10 @@ #define DMA_ECC_INT_STATUS 0x00001088 int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp); -bool dwmac5_safety_feat_irq_status(struct net_device *ndev, +int dwmac5_safety_feat_irq_status(struct net_device *ndev, void __iomem *ioaddr, unsigned int asp, struct stmmac_safety_stats *stats); -const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats, - int index, unsigned long *count); +int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats, + int index, unsigned long *count, const char **desc); #endif /* __DWMAC5_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 6768a25b6aa0..3bfb3f584be2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -382,7 +382,7 @@ static int enh_desc_get_tx_timestamp_status(struct dma_desc *p) return (le32_to_cpu(p->des0) & ETDES0_TIME_STAMP_STATUS) >> 17; } -static u64 enh_desc_get_timestamp(void *desc, u32 ats) +static void enh_desc_get_timestamp(void *desc, u32 ats, u64 *ts) { u64 ns; @@ -397,7 +397,7 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats) ns += le32_to_cpu(p->des3) * 1000000000ULL; } - return ns; + *ts = ns; } static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c new file mode 100644 index 000000000000..2b0a7e79de00 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates. + * stmmac HW Interface Handling + */ + +#include "common.h" +#include "stmmac.h" + +static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) +{ + u32 reg = readl(priv->ioaddr + id_reg); + + if (!reg) { + dev_info(priv->device, "Version ID not available\n"); + return 0x0; + } + + dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", + (unsigned int)(reg & GENMASK(15, 8)) >> 8, + (unsigned int)(reg & GENMASK(7, 0))); + return reg & GENMASK(7, 0); +} + +static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) +{ + struct mac_device_info *mac = priv->hw; + + if (priv->chain_mode) { + dev_info(priv->device, "Chain mode enabled\n"); + priv->mode = STMMAC_CHAIN_MODE; + mac->mode = &chain_mode_ops; + } else { + dev_info(priv->device, "Ring mode enabled\n"); + priv->mode = STMMAC_RING_MODE; + mac->mode = &ring_mode_ops; + } +} + +static int stmmac_dwmac1_quirks(struct stmmac_priv *priv) +{ + struct mac_device_info *mac = priv->hw; + + if (priv->plat->enh_desc) { + dev_info(priv->device, "Enhanced/Alternate descriptors\n"); + + /* GMAC older than 3.50 has no extended descriptors */ + if (priv->synopsys_id >= DWMAC_CORE_3_50) { + dev_info(priv->device, "Enabled extended descriptors\n"); + priv->extend_desc = 1; + } else { + dev_warn(priv->device, "Extended descriptors not supported\n"); + } + + mac->desc = &enh_desc_ops; + } else { + dev_info(priv->device, "Normal descriptors\n"); + mac->desc = &ndesc_ops; + } + + stmmac_dwmac_mode_quirk(priv); + return 0; +} + +static int stmmac_dwmac4_quirks(struct stmmac_priv *priv) +{ + stmmac_dwmac_mode_quirk(priv); + return 0; +} + +static const struct stmmac_hwif_entry { + bool gmac; + bool gmac4; + u32 min_id; + const void *desc; + const void *dma; + const void *mac; + const void *hwtimestamp; + const void *mode; + int (*setup)(struct stmmac_priv *priv); + int (*quirks)(struct stmmac_priv *priv); +} stmmac_hw[] = { + /* NOTE: New HW versions shall go to the end of this table */ + { + .gmac = false, + .gmac4 = false, + .min_id = 0, + .desc = NULL, + .dma = &dwmac100_dma_ops, + .mac = &dwmac100_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .setup = dwmac100_setup, + .quirks = stmmac_dwmac1_quirks, + }, { + .gmac = true, + .gmac4 = false, + .min_id = 0, + .desc = NULL, + .dma = &dwmac1000_dma_ops, + .mac = &dwmac1000_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .setup = dwmac1000_setup, + .quirks = stmmac_dwmac1_quirks, + }, { + .gmac = false, + .gmac4 = true, + .min_id = 0, + .desc = &dwmac4_desc_ops, + .dma = &dwmac4_dma_ops, + .mac = &dwmac4_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .setup = dwmac4_setup, + .quirks = stmmac_dwmac4_quirks, + }, { + .gmac = false, + .gmac4 = true, + .min_id = DWMAC_CORE_4_00, + .desc = &dwmac4_desc_ops, + .dma = &dwmac4_dma_ops, + .mac = &dwmac410_ops, + .hwtimestamp = &stmmac_ptp, + .mode = &dwmac4_ring_mode_ops, + .setup = dwmac4_setup, + .quirks = NULL, + }, { + .gmac = false, + .gmac4 = true, + .min_id = DWMAC_CORE_4_10, + .desc = &dwmac4_desc_ops, + .dma = &dwmac410_dma_ops, + .mac = &dwmac410_ops, + .hwtimestamp = &stmmac_ptp, + .mode = &dwmac4_ring_mode_ops, + .setup = dwmac4_setup, + .quirks = NULL, + }, { + .gmac = false, + .gmac4 = true, + .min_id = DWMAC_CORE_5_10, + .desc = &dwmac4_desc_ops, + .dma = &dwmac410_dma_ops, + .mac = &dwmac510_ops, + .hwtimestamp = &stmmac_ptp, + .mode = &dwmac4_ring_mode_ops, + .setup = dwmac4_setup, + .quirks = NULL, + } +}; + +int stmmac_hwif_init(struct stmmac_priv *priv) +{ + bool needs_gmac4 = priv->plat->has_gmac4; + bool needs_gmac = priv->plat->has_gmac; + const struct stmmac_hwif_entry *entry; + struct mac_device_info *mac; + int i, ret; + u32 id; + + if (needs_gmac) { + id = stmmac_get_id(priv, GMAC_VERSION); + } else { + id = stmmac_get_id(priv, GMAC4_VERSION); + } + + /* Save ID for later use */ + priv->synopsys_id = id; + + /* Check for HW specific setup first */ + if (priv->plat->setup) { + priv->hw = priv->plat->setup(priv); + if (!priv->hw) + return -ENOMEM; + return 0; + } + + mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL); + if (!mac) + return -ENOMEM; + + /* Fallback to generic HW */ + for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { + entry = &stmmac_hw[i]; + + if (needs_gmac ^ entry->gmac) + continue; + if (needs_gmac4 ^ entry->gmac4) + continue; + if (id < entry->min_id) + continue; + + mac->desc = entry->desc; + mac->dma = entry->dma; + mac->mac = entry->mac; + mac->ptp = entry->hwtimestamp; + mac->mode = entry->mode; + + priv->hw = mac; + + /* Entry found */ + ret = entry->setup(priv); + if (ret) + return ret; + + /* Run quirks, if needed */ + if (entry->quirks) { + ret = entry->quirks(priv); + if (ret) + return ret; + } + + return 0; + } + + dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", + id, needs_gmac, needs_gmac4); + return -EINVAL; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h new file mode 100644 index 000000000000..bfad61607f07 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +// Copyright (c) 2018 Synopsys, Inc. and/or its affiliates. +// stmmac HW Interface Callbacks + +#ifndef __STMMAC_HWIF_H__ +#define __STMMAC_HWIF_H__ + +#define stmmac_do_void_callback(__priv, __module, __cname, __arg0, __args...) \ +({ \ + int __result = -EINVAL; \ + if ((__priv)->hw->__module->__cname) { \ + (__priv)->hw->__module->__cname((__arg0), ##__args); \ + __result = 0; \ + } \ + __result; \ +}) +#define stmmac_do_callback(__priv, __module, __cname, __arg0, __args...) \ +({ \ + int __result = -EINVAL; \ + if ((__priv)->hw->__module->__cname) \ + __result = (__priv)->hw->__module->__cname((__arg0), ##__args); \ + __result; \ +}) + +struct stmmac_extra_stats; +struct stmmac_safety_stats; +struct dma_desc; +struct dma_extended_desc; + +/* Descriptors helpers */ +struct stmmac_desc_ops { + /* DMA RX descriptor ring initialization */ + void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode, + int end); + /* DMA TX descriptor ring initialization */ + void (*init_tx_desc)(struct dma_desc *p, int mode, int end); + /* Invoked by the xmit function to prepare the tx descriptor */ + void (*prepare_tx_desc)(struct dma_desc *p, int is_fs, int len, + bool csum_flag, int mode, bool tx_own, bool ls, + unsigned int tot_pkt_len); + void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1, + int len2, bool tx_own, bool ls, unsigned int tcphdrlen, + unsigned int tcppayloadlen); + /* Set/get the owner of the descriptor */ + void (*set_tx_owner)(struct dma_desc *p); + int (*get_tx_owner)(struct dma_desc *p); + /* Clean the tx descriptor as soon as the tx irq is received */ + void (*release_tx_desc)(struct dma_desc *p, int mode); + /* Clear interrupt on tx frame completion. When this bit is + * set an interrupt happens as soon as the frame is transmitted */ + void (*set_tx_ic)(struct dma_desc *p); + /* Last tx segment reports the transmit status */ + int (*get_tx_ls)(struct dma_desc *p); + /* Return the transmit status looking at the TDES1 */ + int (*tx_status)(void *data, struct stmmac_extra_stats *x, + struct dma_desc *p, void __iomem *ioaddr); + /* Get the buffer size from the descriptor */ + int (*get_tx_len)(struct dma_desc *p); + /* Handle extra events on specific interrupts hw dependent */ + void (*set_rx_owner)(struct dma_desc *p); + /* Get the receive frame size */ + int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type); + /* Return the reception status looking at the RDES1 */ + int (*rx_status)(void *data, struct stmmac_extra_stats *x, + struct dma_desc *p); + void (*rx_extended_status)(void *data, struct stmmac_extra_stats *x, + struct dma_extended_desc *p); + /* Set tx timestamp enable bit */ + void (*enable_tx_timestamp) (struct dma_desc *p); + /* get tx timestamp status */ + int (*get_tx_timestamp_status) (struct dma_desc *p); + /* get timestamp value */ + void (*get_timestamp)(void *desc, u32 ats, u64 *ts); + /* get rx timestamp status */ + int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats); + /* Display ring */ + void (*display_ring)(void *head, unsigned int size, bool rx); + /* set MSS via context descriptor */ + void (*set_mss)(struct dma_desc *p, unsigned int mss); +}; + +#define stmmac_init_rx_desc(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, init_rx_desc, __args) +#define stmmac_init_tx_desc(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, init_tx_desc, __args) +#define stmmac_prepare_tx_desc(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, prepare_tx_desc, __args) +#define stmmac_prepare_tso_tx_desc(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, prepare_tso_tx_desc, __args) +#define stmmac_set_tx_owner(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, set_tx_owner, __args) +#define stmmac_get_tx_owner(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_tx_owner, __args) +#define stmmac_release_tx_desc(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, release_tx_desc, __args) +#define stmmac_set_tx_ic(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, set_tx_ic, __args) +#define stmmac_get_tx_ls(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_tx_ls, __args) +#define stmmac_tx_status(__priv, __args...) \ + stmmac_do_callback(__priv, desc, tx_status, __args) +#define stmmac_get_tx_len(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_tx_len, __args) +#define stmmac_set_rx_owner(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, set_rx_owner, __args) +#define stmmac_get_rx_frame_len(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_rx_frame_len, __args) +#define stmmac_rx_status(__priv, __args...) \ + stmmac_do_callback(__priv, desc, rx_status, __args) +#define stmmac_rx_extended_status(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, rx_extended_status, __args) +#define stmmac_enable_tx_timestamp(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, enable_tx_timestamp, __args) +#define stmmac_get_tx_timestamp_status(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_tx_timestamp_status, __args) +#define stmmac_get_timestamp(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, get_timestamp, __args) +#define stmmac_get_rx_timestamp_status(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_rx_timestamp_status, __args) +#define stmmac_display_ring(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, display_ring, __args) +#define stmmac_set_mss(__priv, __args...) \ + stmmac_do_void_callback(__priv, desc, set_mss, __args) + +struct stmmac_dma_cfg; +struct dma_features; + +/* Specific DMA helpers */ +struct stmmac_dma_ops { + /* DMA core initialization */ + int (*reset)(void __iomem *ioaddr); + void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, + u32 dma_tx, u32 dma_rx, int atds); + void (*init_chan)(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, u32 chan); + void (*init_rx_chan)(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_rx_phy, u32 chan); + void (*init_tx_chan)(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + u32 dma_tx_phy, u32 chan); + /* Configure the AXI Bus Mode Register */ + void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi); + /* Dump DMA registers */ + void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space); + /* Set tx/rx threshold in the csr6 register + * An invalid value enables the store-and-forward mode */ + void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode, + int rxfifosz); + void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel, + int fifosz, u8 qmode); + void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel, + int fifosz, u8 qmode); + /* To track extra statistic (if supported) */ + void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, + void __iomem *ioaddr); + void (*enable_dma_transmission) (void __iomem *ioaddr); + void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan); + void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan); + void (*start_tx)(void __iomem *ioaddr, u32 chan); + void (*stop_tx)(void __iomem *ioaddr, u32 chan); + void (*start_rx)(void __iomem *ioaddr, u32 chan); + void (*stop_rx)(void __iomem *ioaddr, u32 chan); + int (*dma_interrupt) (void __iomem *ioaddr, + struct stmmac_extra_stats *x, u32 chan); + /* If supported then get the optional core features */ + void (*get_hw_feature)(void __iomem *ioaddr, + struct dma_features *dma_cap); + /* Program the HW RX Watchdog */ + void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan); + void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); + void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); + void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); + void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); + void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan); +}; + +#define stmmac_reset(__priv, __args...) \ + stmmac_do_callback(__priv, dma, reset, __args) +#define stmmac_dma_init(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, init, __args) +#define stmmac_init_chan(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, init_chan, __args) +#define stmmac_init_rx_chan(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, init_rx_chan, __args) +#define stmmac_init_tx_chan(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, init_tx_chan, __args) +#define stmmac_axi(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, axi, __args) +#define stmmac_dump_dma_regs(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, dump_regs, __args) +#define stmmac_dma_mode(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, dma_mode, __args) +#define stmmac_dma_rx_mode(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, dma_rx_mode, __args) +#define stmmac_dma_tx_mode(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, dma_tx_mode, __args) +#define stmmac_dma_diagnostic_fr(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, dma_diagnostic_fr, __args) +#define stmmac_enable_dma_transmission(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, enable_dma_transmission, __args) +#define stmmac_enable_dma_irq(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, enable_dma_irq, __args) +#define stmmac_disable_dma_irq(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, disable_dma_irq, __args) +#define stmmac_start_tx(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, start_tx, __args) +#define stmmac_stop_tx(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, stop_tx, __args) +#define stmmac_start_rx(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, start_rx, __args) +#define stmmac_stop_rx(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, stop_rx, __args) +#define stmmac_dma_interrupt_status(__priv, __args...) \ + stmmac_do_callback(__priv, dma, dma_interrupt, __args) +#define stmmac_get_hw_feature(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, get_hw_feature, __args) +#define stmmac_rx_watchdog(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, rx_watchdog, __args) +#define stmmac_set_tx_ring_len(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, set_tx_ring_len, __args) +#define stmmac_set_rx_ring_len(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, set_rx_ring_len, __args) +#define stmmac_set_rx_tail_ptr(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, set_rx_tail_ptr, __args) +#define stmmac_set_tx_tail_ptr(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args) +#define stmmac_enable_tso(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, enable_tso, __args) + +struct mac_device_info; +struct net_device; +struct rgmii_adv; +struct stmmac_safety_stats; + +/* Helpers to program the MAC core */ +struct stmmac_ops { + /* MAC core initialization */ + void (*core_init)(struct mac_device_info *hw, struct net_device *dev); + /* Enable the MAC RX/TX */ + void (*set_mac)(void __iomem *ioaddr, bool enable); + /* Enable and verify that the IPC module is supported */ + int (*rx_ipc)(struct mac_device_info *hw); + /* Enable RX Queues */ + void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue); + /* RX Queues Priority */ + void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue); + /* TX Queues Priority */ + void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue); + /* RX Queues Routing */ + void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet, + u32 queue); + /* Program RX Algorithms */ + void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg); + /* Program TX Algorithms */ + void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg); + /* Set MTL TX queues weight */ + void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw, + u32 weight, u32 queue); + /* RX MTL queue to RX dma mapping */ + void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan); + /* Configure AV Algorithm */ + void (*config_cbs)(struct mac_device_info *hw, u32 send_slope, + u32 idle_slope, u32 high_credit, u32 low_credit, + u32 queue); + /* Dump MAC registers */ + void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space); + /* Handle extra events on specific interrupts hw dependent */ + int (*host_irq_status)(struct mac_device_info *hw, + struct stmmac_extra_stats *x); + /* Handle MTL interrupts */ + int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan); + /* Multicast filter setting */ + void (*set_filter)(struct mac_device_info *hw, struct net_device *dev); + /* Flow control setting */ + void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex, + unsigned int fc, unsigned int pause_time, u32 tx_cnt); + /* Set power management mode (e.g. magic frame) */ + void (*pmt)(struct mac_device_info *hw, unsigned long mode); + /* Set/Get Unicast MAC addresses */ + void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr, + unsigned int reg_n); + void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr, + unsigned int reg_n); + void (*set_eee_mode)(struct mac_device_info *hw, + bool en_tx_lpi_clockgating); + void (*reset_eee_mode)(struct mac_device_info *hw); + void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw); + void (*set_eee_pls)(struct mac_device_info *hw, int link); + void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x, + u32 rx_queues, u32 tx_queues); + /* PCS calls */ + void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral, + bool loopback); + void (*pcs_rane)(void __iomem *ioaddr, bool restart); + void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv); + /* Safety Features */ + int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp); + int (*safety_feat_irq_status)(struct net_device *ndev, + void __iomem *ioaddr, unsigned int asp, + struct stmmac_safety_stats *stats); + int (*safety_feat_dump)(struct stmmac_safety_stats *stats, + int index, unsigned long *count, const char **desc); +}; + +#define stmmac_core_init(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, core_init, __args) +#define stmmac_mac_set(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_mac, __args) +#define stmmac_rx_ipc(__priv, __args...) \ + stmmac_do_callback(__priv, mac, rx_ipc, __args) +#define stmmac_rx_queue_enable(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, rx_queue_enable, __args) +#define stmmac_rx_queue_prio(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, rx_queue_prio, __args) +#define stmmac_tx_queue_prio(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, tx_queue_prio, __args) +#define stmmac_rx_queue_routing(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, rx_queue_routing, __args) +#define stmmac_prog_mtl_rx_algorithms(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, prog_mtl_rx_algorithms, __args) +#define stmmac_prog_mtl_tx_algorithms(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, prog_mtl_tx_algorithms, __args) +#define stmmac_set_mtl_tx_queue_weight(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_mtl_tx_queue_weight, __args) +#define stmmac_map_mtl_to_dma(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, map_mtl_to_dma, __args) +#define stmmac_config_cbs(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, config_cbs, __args) +#define stmmac_dump_mac_regs(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, dump_regs, __args) +#define stmmac_host_irq_status(__priv, __args...) \ + stmmac_do_callback(__priv, mac, host_irq_status, __args) +#define stmmac_host_mtl_irq_status(__priv, __args...) \ + stmmac_do_callback(__priv, mac, host_mtl_irq_status, __args) +#define stmmac_set_filter(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_filter, __args) +#define stmmac_flow_ctrl(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, flow_ctrl, __args) +#define stmmac_pmt(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, pmt, __args) +#define stmmac_set_umac_addr(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_umac_addr, __args) +#define stmmac_get_umac_addr(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, get_umac_addr, __args) +#define stmmac_set_eee_mode(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_eee_mode, __args) +#define stmmac_reset_eee_mode(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, reset_eee_mode, __args) +#define stmmac_set_eee_timer(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_eee_timer, __args) +#define stmmac_set_eee_pls(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_eee_pls, __args) +#define stmmac_mac_debug(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, debug, __args) +#define stmmac_pcs_ctrl_ane(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, pcs_ctrl_ane, __args) +#define stmmac_pcs_rane(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, pcs_rane, __args) +#define stmmac_pcs_get_adv_lp(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, pcs_get_adv_lp, __args) +#define stmmac_safety_feat_config(__priv, __args...) \ + stmmac_do_callback(__priv, mac, safety_feat_config, __args) +#define stmmac_safety_feat_irq_status(__priv, __args...) \ + stmmac_do_callback(__priv, mac, safety_feat_irq_status, __args) +#define stmmac_safety_feat_dump(__priv, __args...) \ + stmmac_do_callback(__priv, mac, safety_feat_dump, __args) + +/* PTP and HW Timer helpers */ +struct stmmac_hwtimestamp { + void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data); + void (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock, + int gmac4, u32 *ssinc); + int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec); + int (*config_addend) (void __iomem *ioaddr, u32 addend); + int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec, + int add_sub, int gmac4); + void (*get_systime) (void __iomem *ioaddr, u64 *systime); +}; + +#define stmmac_config_hw_tstamping(__priv, __args...) \ + stmmac_do_void_callback(__priv, ptp, config_hw_tstamping, __args) +#define stmmac_config_sub_second_increment(__priv, __args...) \ + stmmac_do_void_callback(__priv, ptp, config_sub_second_increment, __args) +#define stmmac_init_systime(__priv, __args...) \ + stmmac_do_callback(__priv, ptp, init_systime, __args) +#define stmmac_config_addend(__priv, __args...) \ + stmmac_do_callback(__priv, ptp, config_addend, __args) +#define stmmac_adjust_systime(__priv, __args...) \ + stmmac_do_callback(__priv, ptp, adjust_systime, __args) +#define stmmac_get_systime(__priv, __args...) \ + stmmac_do_void_callback(__priv, ptp, get_systime, __args) + +/* Helpers to manage the descriptors for chain and ring modes */ +struct stmmac_mode_ops { + void (*init) (void *des, dma_addr_t phy_addr, unsigned int size, + unsigned int extend_desc); + unsigned int (*is_jumbo_frm) (int len, int ehn_desc); + int (*jumbo_frm)(void *priv, struct sk_buff *skb, int csum); + int (*set_16kib_bfsize)(int mtu); + void (*init_desc3)(struct dma_desc *p); + void (*refill_desc3) (void *priv, struct dma_desc *p); + void (*clean_desc3) (void *priv, struct dma_desc *p); +}; + +#define stmmac_mode_init(__priv, __args...) \ + stmmac_do_void_callback(__priv, mode, init, __args) +#define stmmac_is_jumbo_frm(__priv, __args...) \ + stmmac_do_callback(__priv, mode, is_jumbo_frm, __args) +#define stmmac_jumbo_frm(__priv, __args...) \ + stmmac_do_callback(__priv, mode, jumbo_frm, __args) +#define stmmac_set_16kib_bfsize(__priv, __args...) \ + stmmac_do_callback(__priv, mode, set_16kib_bfsize, __args) +#define stmmac_init_desc3(__priv, __args...) \ + stmmac_do_void_callback(__priv, mode, init_desc3, __args) +#define stmmac_refill_desc3(__priv, __args...) \ + stmmac_do_void_callback(__priv, mode, refill_desc3, __args) +#define stmmac_clean_desc3(__priv, __args...) \ + stmmac_do_void_callback(__priv, mode, clean_desc3, __args) + +struct stmmac_priv; + +extern const struct stmmac_ops dwmac100_ops; +extern const struct stmmac_dma_ops dwmac100_dma_ops; +extern const struct stmmac_ops dwmac1000_ops; +extern const struct stmmac_dma_ops dwmac1000_dma_ops; +extern const struct stmmac_ops dwmac4_ops; +extern const struct stmmac_dma_ops dwmac4_dma_ops; +extern const struct stmmac_ops dwmac410_ops; +extern const struct stmmac_dma_ops dwmac410_dma_ops; +extern const struct stmmac_ops dwmac510_ops; + +#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ +#define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */ + +int stmmac_hwif_init(struct stmmac_priv *priv); + +#endif /* __STMMAC_HWIF_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index ebd9e5e00f16..7b1d901bf5bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -253,7 +253,7 @@ static int ndesc_get_tx_timestamp_status(struct dma_desc *p) return (le32_to_cpu(p->des0) & TDES0_TIME_STAMP_STATUS) >> 17; } -static u64 ndesc_get_timestamp(void *desc, u32 ats) +static void ndesc_get_timestamp(void *desc, u32 ats, u64 *ts) { struct dma_desc *p = (struct dma_desc *)desc; u64 ns; @@ -262,7 +262,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats) /* convert high/sec time stamp value to nanosecond */ ns += le32_to_cpu(p->des3) * 1000000000ULL; - return ns; + *ts = ns; } static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index 28e4b5d50ce6..a7ffc73fffe8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -24,7 +24,7 @@ #include "stmmac.h" -static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) +static int jumbo_frm(void *p, struct sk_buff *skb, int csum) { struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p; unsigned int nopaged_len = skb_headlen(skb); @@ -58,9 +58,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); - priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, - STMMAC_RING_MODE, 0, - false, skb->len); + stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, + STMMAC_RING_MODE, 0, false, skb->len); tx_q->tx_skbuff[entry] = NULL; entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); @@ -79,9 +78,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); - priv->hw->desc->prepare_tx_desc(desc, 0, len, csum, - STMMAC_RING_MODE, 1, - true, skb->len); + stmmac_prepare_tx_desc(priv, desc, 0, len, csum, + STMMAC_RING_MODE, 1, true, skb->len); } else { des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); @@ -92,9 +90,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].len = nopaged_len; tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); - priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum, - STMMAC_RING_MODE, 0, - true, skb->len); + stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum, + STMMAC_RING_MODE, 0, true, skb->len); } tx_q->cur_tx = entry; @@ -102,7 +99,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) return entry; } -static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc) +static unsigned int is_jumbo_frm(int len, int enh_desc) { unsigned int ret = 0; @@ -112,7 +109,7 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc) return ret; } -static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) +static void refill_desc3(void *priv_ptr, struct dma_desc *p) { struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; @@ -122,12 +119,12 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) } /* In ring mode we need to fill the desc3 because it is used as buffer */ -static void stmmac_init_desc3(struct dma_desc *p) +static void init_desc3(struct dma_desc *p) { p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB); } -static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) +static void clean_desc3(void *priv_ptr, struct dma_desc *p) { struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr; struct stmmac_priv *priv = tx_q->priv_data; @@ -140,7 +137,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) p->des3 = 0; } -static int stmmac_set_16kib_bfsize(int mtu) +static int set_16kib_bfsize(int mtu) { int ret = 0; if (unlikely(mtu >= BUF_SIZE_8KiB)) @@ -149,10 +146,10 @@ static int stmmac_set_16kib_bfsize(int mtu) } const struct stmmac_mode_ops ring_mode_ops = { - .is_jumbo_frm = stmmac_is_jumbo_frm, - .jumbo_frm = stmmac_jumbo_frm, - .refill_desc3 = stmmac_refill_desc3, - .init_desc3 = stmmac_init_desc3, - .clean_desc3 = stmmac_clean_desc3, - .set_16kib_bfsize = stmmac_set_16kib_bfsize, + .is_jumbo_frm = is_jumbo_frm, + .jumbo_frm = jumbo_frm, + .refill_desc3 = refill_desc3, + .init_desc3 = init_desc3, + .clean_desc3 = clean_desc3, + .set_16kib_bfsize = set_16kib_bfsize, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index da50451f8999..2443f20e07bf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -130,6 +130,7 @@ struct stmmac_priv { int eee_active; int tx_lpi_timer; unsigned int mode; + unsigned int chain_mode; int extend_desc; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 2c6ed47704fc..6d82b3ef5c3b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -291,11 +291,9 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev, cmd->base.speed = priv->xstats.pcs_speed; /* Get and convert ADV/LP_ADV from the HW AN registers */ - if (!priv->hw->mac->pcs_get_adv_lp) + if (stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv)) return -EOPNOTSUPP; /* should never happen indeed */ - priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv); - /* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */ ethtool_convert_link_mode_to_legacy_u32( @@ -393,11 +391,7 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev, ADVERTISED_10baseT_Full); spin_lock(&priv->lock); - - if (priv->hw->mac->pcs_ctrl_ane) - priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1, - priv->hw->ps, 0); - + stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0); spin_unlock(&priv->lock); return 0; @@ -442,8 +436,8 @@ static void stmmac_ethtool_gregs(struct net_device *dev, memset(reg_space, 0x0, REG_SPACE_SIZE); - priv->hw->mac->dump_regs(priv->hw, reg_space); - priv->hw->dma->dump_regs(priv->ioaddr, reg_space); + stmmac_dump_mac_regs(priv, priv->hw, reg_space); + stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space); /* Copy DMA registers to where ethtool expects them */ memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4], NUM_DWMAC1000_DMA_REGS * 4); @@ -454,15 +448,13 @@ stmmac_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct stmmac_priv *priv = netdev_priv(netdev); + struct rgmii_adv adv_lp; pause->rx_pause = 0; pause->tx_pause = 0; - if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) { - struct rgmii_adv adv_lp; - + if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) { pause->autoneg = 1; - priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp); if (!adv_lp.pause) return; } else { @@ -488,12 +480,10 @@ stmmac_set_pauseparam(struct net_device *netdev, u32 tx_cnt = priv->plat->tx_queues_to_use; struct phy_device *phy = netdev->phydev; int new_pause = FLOW_OFF; + struct rgmii_adv adv_lp; - if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) { - struct rgmii_adv adv_lp; - + if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) { pause->autoneg = 1; - priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp); if (!adv_lp.pause) return -EOPNOTSUPP; } else { @@ -515,37 +505,32 @@ stmmac_set_pauseparam(struct net_device *netdev, return phy_start_aneg(phy); } - priv->hw->mac->flow_ctrl(priv->hw, phy->duplex, priv->flow_ctrl, - priv->pause, tx_cnt); + stmmac_flow_ctrl(priv, priv->hw, phy->duplex, priv->flow_ctrl, + priv->pause, tx_cnt); return 0; } static void stmmac_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *dummy, u64 *data) { - const char *(*dump)(struct stmmac_safety_stats *stats, int index, - unsigned long *count); struct stmmac_priv *priv = netdev_priv(dev); u32 rx_queues_count = priv->plat->rx_queues_to_use; u32 tx_queues_count = priv->plat->tx_queues_to_use; unsigned long count; - int i, j = 0; - - if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) { - dump = priv->hw->mac->safety_feat_dump; + int i, j = 0, ret; + if (priv->dma_cap.asp) { for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) { - if (dump(&priv->sstats, i, &count)) + if (!stmmac_safety_feat_dump(priv, &priv->sstats, i, + &count, NULL)) data[j++] = count; } } /* Update the DMA HW counters for dwmac10/100 */ - if (priv->hw->dma->dma_diagnostic_fr) - priv->hw->dma->dma_diagnostic_fr(&dev->stats, - (void *) &priv->xstats, - priv->ioaddr); - else { + ret = stmmac_dma_diagnostic_fr(priv, &dev->stats, (void *) &priv->xstats, + priv->ioaddr); + if (ret) { /* If supported, for new GMAC chips expose the MMC counters */ if (priv->dma_cap.rmon) { dwmac_mmc_read(priv->mmcaddr, &priv->mmc); @@ -565,11 +550,10 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, priv->xstats.phy_eee_wakeup_error_n = val; } - if ((priv->hw->mac->debug) && - (priv->synopsys_id >= DWMAC_CORE_3_50)) - priv->hw->mac->debug(priv->ioaddr, - (void *)&priv->xstats, - rx_queues_count, tx_queues_count); + if (priv->synopsys_id >= DWMAC_CORE_3_50) + stmmac_mac_debug(priv, priv->ioaddr, + (void *)&priv->xstats, + rx_queues_count, tx_queues_count); } for (i = 0; i < STMMAC_STATS_LEN; i++) { char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset; @@ -581,8 +565,6 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, static int stmmac_get_sset_count(struct net_device *netdev, int sset) { struct stmmac_priv *priv = netdev_priv(netdev); - const char *(*dump)(struct stmmac_safety_stats *stats, int index, - unsigned long *count); int i, len, safety_len = 0; switch (sset) { @@ -591,11 +573,11 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset) if (priv->dma_cap.rmon) len += STMMAC_MMC_STATS_LEN; - if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) { - dump = priv->hw->mac->safety_feat_dump; - + if (priv->dma_cap.asp) { for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) { - if (dump(&priv->sstats, i, NULL)) + if (!stmmac_safety_feat_dump(priv, + &priv->sstats, i, + NULL, NULL)) safety_len++; } @@ -613,17 +595,15 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data) int i; u8 *p = data; struct stmmac_priv *priv = netdev_priv(dev); - const char *(*dump)(struct stmmac_safety_stats *stats, int index, - unsigned long *count); switch (stringset) { case ETH_SS_STATS: - if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) { - dump = priv->hw->mac->safety_feat_dump; + if (priv->dma_cap.asp) { for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) { - const char *desc = dump(&priv->sstats, i, NULL); - - if (desc) { + const char *desc; + if (!stmmac_safety_feat_dump(priv, + &priv->sstats, i, + NULL, &desc)) { memcpy(p, desc, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } @@ -810,7 +790,7 @@ static int stmmac_set_coalesce(struct net_device *dev, priv->tx_coal_frames = ec->tx_max_coalesced_frames; priv->tx_coal_timer = ec->tx_coalesce_usecs; priv->rx_riwt = rx_riwt; - priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt, rx_cnt); + stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 08c19ebd5306..8d9cc2157afd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -24,13 +24,13 @@ #include "common.h" #include "stmmac_ptp.h" -static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data) +static void config_hw_tstamping(void __iomem *ioaddr, u32 data) { writel(data, ioaddr + PTP_TCR); } -static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, - u32 ptp_clock, int gmac4) +static void config_sub_second_increment(void __iomem *ioaddr, + u32 ptp_clock, int gmac4, u32 *ssinc) { u32 value = readl(ioaddr + PTP_TCR); unsigned long data; @@ -57,10 +57,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, writel(reg_value, ioaddr + PTP_SSIR); - return data; + if (ssinc) + *ssinc = data; } -static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) +static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) { int limit; u32 value; @@ -85,7 +86,7 @@ static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) return 0; } -static int stmmac_config_addend(void __iomem *ioaddr, u32 addend) +static int config_addend(void __iomem *ioaddr, u32 addend) { u32 value; int limit; @@ -109,8 +110,8 @@ static int stmmac_config_addend(void __iomem *ioaddr, u32 addend) return 0; } -static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, - int add_sub, int gmac4) +static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, + int add_sub, int gmac4) { u32 value; int limit; @@ -152,7 +153,7 @@ static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, return 0; } -static u64 stmmac_get_systime(void __iomem *ioaddr) +static void get_systime(void __iomem *ioaddr, u64 *systime) { u64 ns; @@ -161,14 +162,15 @@ static u64 stmmac_get_systime(void __iomem *ioaddr) /* Get the TSS and convert sec time value to nanosecond */ ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; - return ns; + if (systime) + *systime = ns; } const struct stmmac_hwtimestamp stmmac_ptp = { - .config_hw_tstamping = stmmac_config_hw_tstamping, - .init_systime = stmmac_init_systime, - .config_sub_second_increment = stmmac_config_sub_second_increment, - .config_addend = stmmac_config_addend, - .adjust_systime = stmmac_adjust_systime, - .get_systime = stmmac_get_systime, + .config_hw_tstamping = config_hw_tstamping, + .init_systime = init_systime, + .config_sub_second_increment = config_sub_second_increment, + .config_addend = config_addend, + .adjust_systime = adjust_systime, + .get_systime = get_systime, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b65e2d144698..b935478df55f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -50,6 +50,7 @@ #include <linux/reset.h> #include <linux/of_mdio.h> #include "dwmac1000.h" +#include "hwif.h" #define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) @@ -335,8 +336,8 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv) /* Check and enter in LPI mode */ if (!priv->tx_path_in_lpi_mode) - priv->hw->mac->set_eee_mode(priv->hw, - priv->plat->en_tx_lpi_clockgating); + stmmac_set_eee_mode(priv, priv->hw, + priv->plat->en_tx_lpi_clockgating); } /** @@ -347,7 +348,7 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv) */ void stmmac_disable_eee_mode(struct stmmac_priv *priv) { - priv->hw->mac->reset_eee_mode(priv->hw); + stmmac_reset_eee_mode(priv, priv->hw); del_timer_sync(&priv->eee_ctrl_timer); priv->tx_path_in_lpi_mode = false; } @@ -410,8 +411,8 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if (priv->eee_active) { netdev_dbg(priv->dev, "disable EEE\n"); del_timer_sync(&priv->eee_ctrl_timer); - priv->hw->mac->set_eee_timer(priv->hw, 0, - tx_lpi_timer); + stmmac_set_eee_timer(priv, priv->hw, 0, + tx_lpi_timer); } priv->eee_active = 0; spin_unlock_irqrestore(&priv->lock, flags); @@ -426,12 +427,11 @@ bool stmmac_eee_init(struct stmmac_priv *priv) mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); - priv->hw->mac->set_eee_timer(priv->hw, - STMMAC_DEFAULT_LIT_LS, - tx_lpi_timer); + stmmac_set_eee_timer(priv, priv->hw, + STMMAC_DEFAULT_LIT_LS, tx_lpi_timer); } /* Set HW EEE according to the speed */ - priv->hw->mac->set_eee_pls(priv->hw, ndev->phydev->link); + stmmac_set_eee_pls(priv, priv->hw, ndev->phydev->link); ret = true; spin_unlock_irqrestore(&priv->lock, flags); @@ -464,9 +464,9 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, return; /* check tx tstamp status */ - if (priv->hw->desc->get_tx_timestamp_status(p)) { + if (stmmac_get_tx_timestamp_status(priv, p)) { /* get the valid tstamp */ - ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); + stmmac_get_timestamp(priv, p, priv->adv_ts, &ns); memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); shhwtstamp.hwtstamp = ns_to_ktime(ns); @@ -502,8 +502,8 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, desc = np; /* Check if timestamp is available */ - if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) { - ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); + if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) { + stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns); netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); @@ -707,18 +707,18 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON; if (!priv->hwts_tx_en && !priv->hwts_rx_en) - priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, 0); + stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0); else { value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR | tstamp_all | ptp_v2 | ptp_over_ethernet | ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en | ts_master_en | snap_type_sel); - priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, value); + stmmac_config_hw_tstamping(priv, priv->ptpaddr, value); /* program Sub Second Increment reg */ - sec_inc = priv->hw->ptp->config_sub_second_increment( - priv->ptpaddr, priv->plat->clk_ptp_rate, - priv->plat->has_gmac4); + stmmac_config_sub_second_increment(priv, + priv->ptpaddr, priv->plat->clk_ptp_rate, + priv->plat->has_gmac4, &sec_inc); temp = div_u64(1000000000ULL, sec_inc); /* calculate default added value: @@ -728,15 +728,14 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) */ temp = (u64)(temp << 32); priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate); - priv->hw->ptp->config_addend(priv->ptpaddr, - priv->default_addend); + stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend); /* initialize system time */ ktime_get_real_ts64(&now); /* lower 32 bits of tv_sec are safe until y2106 */ - priv->hw->ptp->init_systime(priv->ptpaddr, (u32)now.tv_sec, - now.tv_nsec); + stmmac_init_systime(priv, priv->ptpaddr, + (u32)now.tv_sec, now.tv_nsec); } return copy_to_user(ifr->ifr_data, &config, @@ -770,7 +769,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) netdev_info(priv->dev, "IEEE 1588-2008 Advanced Timestamp supported\n"); - priv->hw->ptp = &stmmac_ptp; priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; @@ -795,8 +793,8 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex) { u32 tx_cnt = priv->plat->tx_queues_to_use; - priv->hw->mac->flow_ctrl(priv->hw, duplex, priv->flow_ctrl, - priv->pause, tx_cnt); + stmmac_flow_ctrl(priv, priv->hw, duplex, priv->flow_ctrl, + priv->pause, tx_cnt); } /** @@ -1008,7 +1006,7 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv) head_rx = (void *)rx_q->dma_rx; /* Display RX ring */ - priv->hw->desc->display_ring(head_rx, DMA_RX_SIZE, true); + stmmac_display_ring(priv, head_rx, DMA_RX_SIZE, true); } } @@ -1029,7 +1027,7 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv) else head_tx = (void *)tx_q->dma_tx; - priv->hw->desc->display_ring(head_tx, DMA_TX_SIZE, false); + stmmac_display_ring(priv, head_tx, DMA_TX_SIZE, false); } } @@ -1073,13 +1071,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue) /* Clear the RX descriptors */ for (i = 0; i < DMA_RX_SIZE; i++) if (priv->extend_desc) - priv->hw->desc->init_rx_desc(&rx_q->dma_erx[i].basic, - priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic, + priv->use_riwt, priv->mode, + (i == DMA_RX_SIZE - 1)); else - priv->hw->desc->init_rx_desc(&rx_q->dma_rx[i], - priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + stmmac_init_rx_desc(priv, &rx_q->dma_rx[i], + priv->use_riwt, priv->mode, + (i == DMA_RX_SIZE - 1)); } /** @@ -1097,13 +1095,11 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue) /* Clear the TX descriptors */ for (i = 0; i < DMA_TX_SIZE; i++) if (priv->extend_desc) - priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic, - priv->mode, - (i == DMA_TX_SIZE - 1)); + stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic, + priv->mode, (i == DMA_TX_SIZE - 1)); else - priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i], - priv->mode, - (i == DMA_TX_SIZE - 1)); + stmmac_init_tx_desc(priv, &tx_q->dma_tx[i], + priv->mode, (i == DMA_TX_SIZE - 1)); } /** @@ -1164,9 +1160,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, else p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[i]); - if ((priv->hw->mode->init_desc3) && - (priv->dma_buf_sz == BUF_SIZE_16KiB)) - priv->hw->mode->init_desc3(p); + if (priv->dma_buf_sz == BUF_SIZE_16KiB) + stmmac_init_desc3(priv, p); return 0; } @@ -1232,13 +1227,14 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) { struct stmmac_priv *priv = netdev_priv(dev); u32 rx_count = priv->plat->rx_queues_to_use; - unsigned int bfsize = 0; int ret = -ENOMEM; + int bfsize = 0; int queue; int i; - if (priv->hw->mode->set_16kib_bfsize) - bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu); + bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu); + if (bfsize < 0) + bfsize = 0; if (bfsize < BUF_SIZE_16KiB) bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz); @@ -1282,13 +1278,11 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) /* Setup the chained descriptor addresses */ if (priv->mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) - priv->hw->mode->init(rx_q->dma_erx, - rx_q->dma_rx_phy, - DMA_RX_SIZE, 1); + stmmac_mode_init(priv, rx_q->dma_erx, + rx_q->dma_rx_phy, DMA_RX_SIZE, 1); else - priv->hw->mode->init(rx_q->dma_rx, - rx_q->dma_rx_phy, - DMA_RX_SIZE, 0); + stmmac_mode_init(priv, rx_q->dma_rx, + rx_q->dma_rx_phy, DMA_RX_SIZE, 0); } } @@ -1335,13 +1329,11 @@ static int init_dma_tx_desc_rings(struct net_device *dev) /* Setup the chained descriptor addresses */ if (priv->mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) - priv->hw->mode->init(tx_q->dma_etx, - tx_q->dma_tx_phy, - DMA_TX_SIZE, 1); + stmmac_mode_init(priv, tx_q->dma_etx, + tx_q->dma_tx_phy, DMA_TX_SIZE, 1); else - priv->hw->mode->init(tx_q->dma_tx, - tx_q->dma_tx_phy, - DMA_TX_SIZE, 0); + stmmac_mode_init(priv, tx_q->dma_tx, + tx_q->dma_tx_phy, DMA_TX_SIZE, 0); } for (i = 0; i < DMA_TX_SIZE; i++) { @@ -1664,7 +1656,7 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv) for (queue = 0; queue < rx_queues_count; queue++) { mode = priv->plat->rx_queues_cfg[queue].mode_to_use; - priv->hw->mac->rx_queue_enable(priv->hw, mode, queue); + stmmac_rx_queue_enable(priv, priv->hw, mode, queue); } } @@ -1678,7 +1670,7 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv) static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan) { netdev_dbg(priv->dev, "DMA RX processes started in channel %d\n", chan); - priv->hw->dma->start_rx(priv->ioaddr, chan); + stmmac_start_rx(priv, priv->ioaddr, chan); } /** @@ -1691,7 +1683,7 @@ static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan) static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan) { netdev_dbg(priv->dev, "DMA TX processes started in channel %d\n", chan); - priv->hw->dma->start_tx(priv->ioaddr, chan); + stmmac_start_tx(priv, priv->ioaddr, chan); } /** @@ -1704,7 +1696,7 @@ static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan) static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan) { netdev_dbg(priv->dev, "DMA RX processes stopped in channel %d\n", chan); - priv->hw->dma->stop_rx(priv->ioaddr, chan); + stmmac_stop_rx(priv, priv->ioaddr, chan); } /** @@ -1717,7 +1709,7 @@ static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan) static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan) { netdev_dbg(priv->dev, "DMA TX processes stopped in channel %d\n", chan); - priv->hw->dma->stop_tx(priv->ioaddr, chan); + stmmac_stop_tx(priv, priv->ioaddr, chan); } /** @@ -1808,19 +1800,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) for (chan = 0; chan < rx_channels_count; chan++) { qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; - priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, - rxfifosz, qmode); + stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, + rxfifosz, qmode); } for (chan = 0; chan < tx_channels_count; chan++) { qmode = priv->plat->tx_queues_cfg[chan].mode_to_use; - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, - txfifosz, qmode); + stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, + txfifosz, qmode); } } else { - priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, - rxfifosz); + stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz); } } @@ -1851,9 +1842,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) else p = tx_q->dma_tx + entry; - status = priv->hw->desc->tx_status(&priv->dev->stats, - &priv->xstats, p, - priv->ioaddr); + status = stmmac_tx_status(priv, &priv->dev->stats, + &priv->xstats, p, priv->ioaddr); /* Check if the descriptor is owned by the DMA */ if (unlikely(status & tx_dma_own)) break; @@ -1891,8 +1881,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) tx_q->tx_skbuff_dma[entry].map_as_page = false; } - if (priv->hw->mode->clean_desc3) - priv->hw->mode->clean_desc3(tx_q, p); + stmmac_clean_desc3(priv, tx_q, p); tx_q->tx_skbuff_dma[entry].last_segment = false; tx_q->tx_skbuff_dma[entry].is_jumbo = false; @@ -1904,7 +1893,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) tx_q->tx_skbuff[entry] = NULL; } - priv->hw->desc->release_tx_desc(p, priv->mode); + stmmac_release_tx_desc(priv, p, priv->mode); entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); } @@ -1929,16 +1918,6 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) netif_tx_unlock(priv->dev); } -static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv, u32 chan) -{ - priv->hw->dma->enable_dma_irq(priv->ioaddr, chan); -} - -static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv, u32 chan) -{ - priv->hw->dma->disable_dma_irq(priv->ioaddr, chan); -} - /** * stmmac_tx_err - to manage the tx error * @priv: driver private structure @@ -1957,13 +1936,11 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan) dma_free_tx_skbufs(priv, chan); for (i = 0; i < DMA_TX_SIZE; i++) if (priv->extend_desc) - priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic, - priv->mode, - (i == DMA_TX_SIZE - 1)); + stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic, + priv->mode, (i == DMA_TX_SIZE - 1)); else - priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i], - priv->mode, - (i == DMA_TX_SIZE - 1)); + stmmac_init_tx_desc(priv, &tx_q->dma_tx[i], + priv->mode, (i == DMA_TX_SIZE - 1)); tx_q->dirty_tx = 0; tx_q->cur_tx = 0; tx_q->mss = 0; @@ -2004,30 +1981,30 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, txfifosz /= tx_channels_count; if (priv->synopsys_id >= DWMAC_CORE_4_00) { - priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, - rxfifosz, rxqmode); - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, - txfifosz, txqmode); + stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, + rxqmode); + stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, txfifosz, + txqmode); } else { - priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, - rxfifosz); + stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz); } } static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv) { - bool ret = false; + int ret = false; /* Safety features are only available in cores >= 5.10 */ if (priv->synopsys_id < DWMAC_CORE_5_10) return ret; - if (priv->hw->mac->safety_feat_irq_status) - ret = priv->hw->mac->safety_feat_irq_status(priv->dev, - priv->ioaddr, priv->dma_cap.asp, &priv->sstats); - - if (ret) + ret = stmmac_safety_feat_irq_status(priv, priv->dev, + priv->ioaddr, priv->dma_cap.asp, &priv->sstats); + if (ret && (ret != -EINVAL)) { stmmac_global_err(priv); - return ret; + return true; + } + + return false; } /** @@ -2045,7 +2022,11 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) tx_channel_count : rx_channel_count; u32 chan; bool poll_scheduled = false; - int status[channels_to_check]; + int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; + + /* Make sure we never check beyond our status buffer. */ + if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status))) + channels_to_check = ARRAY_SIZE(status); /* Each DMA channel can be used for rx and tx simultaneously, yet * napi_struct is embedded in struct stmmac_rx_queue rather than in a @@ -2054,16 +2035,15 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) * all tx queues rather than just a single tx queue. */ for (chan = 0; chan < channels_to_check; chan++) - status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr, - &priv->xstats, - chan); + status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr, + &priv->xstats, chan); for (chan = 0; chan < rx_channel_count; chan++) { if (likely(status[chan] & handle_rx)) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; if (likely(napi_schedule_prep(&rx_q->napi))) { - stmmac_disable_dma_irq(priv, chan); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan); __napi_schedule(&rx_q->napi); poll_scheduled = true; } @@ -2084,7 +2064,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) &priv->rx_queue[0]; if (likely(napi_schedule_prep(&rx_q->napi))) { - stmmac_disable_dma_irq(priv, chan); + stmmac_disable_dma_irq(priv, + priv->ioaddr, chan); __napi_schedule(&rx_q->napi); } break; @@ -2144,32 +2125,6 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv) } /** - * stmmac_selec_desc_mode - to select among: normal/alternate/extend descriptors - * @priv: driver private structure - * Description: select the Enhanced/Alternate or Normal descriptors. - * In case of Enhanced/Alternate, it checks if the extended descriptors are - * supported by the HW capability register. - */ -static void stmmac_selec_desc_mode(struct stmmac_priv *priv) -{ - if (priv->plat->enh_desc) { - dev_info(priv->device, "Enhanced/Alternate descriptors\n"); - - /* GMAC older than 3.50 has no extended descriptors */ - if (priv->synopsys_id >= DWMAC_CORE_3_50) { - dev_info(priv->device, "Enabled extended descriptors\n"); - priv->extend_desc = 1; - } else - dev_warn(priv->device, "Extended descriptors not supported\n"); - - priv->hw->desc = &enh_desc_ops; - } else { - dev_info(priv->device, "Normal descriptors\n"); - priv->hw->desc = &ndesc_ops; - } -} - -/** * stmmac_get_hw_features - get MAC capabilities from the HW cap. register. * @priv: driver private structure * Description: @@ -2180,15 +2135,7 @@ static void stmmac_selec_desc_mode(struct stmmac_priv *priv) */ static int stmmac_get_hw_features(struct stmmac_priv *priv) { - u32 ret = 0; - - if (priv->hw->dma->get_hw_feature) { - priv->hw->dma->get_hw_feature(priv->ioaddr, - &priv->dma_cap); - ret = 1; - } - - return ret; + return stmmac_get_hw_feature(priv, priv->ioaddr, &priv->dma_cap) == 0; } /** @@ -2201,8 +2148,7 @@ static int stmmac_get_hw_features(struct stmmac_priv *priv) static void stmmac_check_ether_addr(struct stmmac_priv *priv) { if (!is_valid_ether_addr(priv->dev->dev_addr)) { - priv->hw->mac->get_umac_addr(priv->hw, - priv->dev->dev_addr, 0); + stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0); if (!is_valid_ether_addr(priv->dev->dev_addr)) eth_hw_addr_random(priv->dev); netdev_info(priv->dev, "device MAC address %pM\n", @@ -2238,7 +2184,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE)) atds = 1; - ret = priv->hw->dma->reset(priv->ioaddr); + ret = stmmac_reset(priv, priv->ioaddr); if (ret) { dev_err(priv->device, "Failed to reset the dma\n"); return ret; @@ -2246,51 +2192,48 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) if (priv->synopsys_id >= DWMAC_CORE_4_00) { /* DMA Configuration */ - priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg, - dummy_dma_tx_phy, dummy_dma_rx_phy, atds); + stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, + dummy_dma_tx_phy, dummy_dma_rx_phy, atds); /* DMA RX Channel Configuration */ for (chan = 0; chan < rx_channels_count; chan++) { rx_q = &priv->rx_queue[chan]; - priv->hw->dma->init_rx_chan(priv->ioaddr, - priv->plat->dma_cfg, - rx_q->dma_rx_phy, chan); + stmmac_init_rx_chan(priv, priv->ioaddr, + priv->plat->dma_cfg, rx_q->dma_rx_phy, + chan); rx_q->rx_tail_addr = rx_q->dma_rx_phy + (DMA_RX_SIZE * sizeof(struct dma_desc)); - priv->hw->dma->set_rx_tail_ptr(priv->ioaddr, - rx_q->rx_tail_addr, - chan); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, + rx_q->rx_tail_addr, chan); } /* DMA TX Channel Configuration */ for (chan = 0; chan < tx_channels_count; chan++) { tx_q = &priv->tx_queue[chan]; - priv->hw->dma->init_chan(priv->ioaddr, - priv->plat->dma_cfg, - chan); + stmmac_init_chan(priv, priv->ioaddr, + priv->plat->dma_cfg, chan); - priv->hw->dma->init_tx_chan(priv->ioaddr, - priv->plat->dma_cfg, - tx_q->dma_tx_phy, chan); + stmmac_init_tx_chan(priv, priv->ioaddr, + priv->plat->dma_cfg, tx_q->dma_tx_phy, + chan); tx_q->tx_tail_addr = tx_q->dma_tx_phy + (DMA_TX_SIZE * sizeof(struct dma_desc)); - priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, - tx_q->tx_tail_addr, - chan); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, + tx_q->tx_tail_addr, chan); } } else { rx_q = &priv->rx_queue[chan]; tx_q = &priv->tx_queue[chan]; - priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg, - tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds); + stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, + tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds); } - if (priv->plat->axi && priv->hw->dma->axi) - priv->hw->dma->axi(priv->ioaddr, priv->plat->axi); + if (priv->plat->axi) + stmmac_axi(priv, priv->ioaddr, priv->plat->axi); return ret; } @@ -2336,18 +2279,14 @@ static void stmmac_set_rings_length(struct stmmac_priv *priv) u32 chan; /* set TX ring length */ - if (priv->hw->dma->set_tx_ring_len) { - for (chan = 0; chan < tx_channels_count; chan++) - priv->hw->dma->set_tx_ring_len(priv->ioaddr, - (DMA_TX_SIZE - 1), chan); - } + for (chan = 0; chan < tx_channels_count; chan++) + stmmac_set_tx_ring_len(priv, priv->ioaddr, + (DMA_TX_SIZE - 1), chan); /* set RX ring length */ - if (priv->hw->dma->set_rx_ring_len) { - for (chan = 0; chan < rx_channels_count; chan++) - priv->hw->dma->set_rx_ring_len(priv->ioaddr, - (DMA_RX_SIZE - 1), chan); - } + for (chan = 0; chan < rx_channels_count; chan++) + stmmac_set_rx_ring_len(priv, priv->ioaddr, + (DMA_RX_SIZE - 1), chan); } /** @@ -2363,7 +2302,7 @@ static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv) for (queue = 0; queue < tx_queues_count; queue++) { weight = priv->plat->tx_queues_cfg[queue].weight; - priv->hw->mac->set_mtl_tx_queue_weight(priv->hw, weight, queue); + stmmac_set_mtl_tx_queue_weight(priv, priv->hw, weight, queue); } } @@ -2384,7 +2323,7 @@ static void stmmac_configure_cbs(struct stmmac_priv *priv) if (mode_to_use == MTL_QUEUE_DCB) continue; - priv->hw->mac->config_cbs(priv->hw, + stmmac_config_cbs(priv, priv->hw, priv->plat->tx_queues_cfg[queue].send_slope, priv->plat->tx_queues_cfg[queue].idle_slope, priv->plat->tx_queues_cfg[queue].high_credit, @@ -2406,7 +2345,7 @@ static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv) for (queue = 0; queue < rx_queues_count; queue++) { chan = priv->plat->rx_queues_cfg[queue].chan; - priv->hw->mac->map_mtl_to_dma(priv->hw, queue, chan); + stmmac_map_mtl_to_dma(priv, priv->hw, queue, chan); } } @@ -2426,7 +2365,7 @@ static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv) continue; prio = priv->plat->rx_queues_cfg[queue].prio; - priv->hw->mac->rx_queue_prio(priv->hw, prio, queue); + stmmac_rx_queue_prio(priv, priv->hw, prio, queue); } } @@ -2446,7 +2385,7 @@ static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv) continue; prio = priv->plat->tx_queues_cfg[queue].prio; - priv->hw->mac->tx_queue_prio(priv->hw, prio, queue); + stmmac_tx_queue_prio(priv, priv->hw, prio, queue); } } @@ -2467,7 +2406,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv) continue; packet = priv->plat->rx_queues_cfg[queue].pkt_route; - priv->hw->mac->rx_queue_routing(priv->hw, packet, queue); + stmmac_rx_queue_routing(priv, priv->hw, packet, queue); } } @@ -2481,50 +2420,47 @@ static void stmmac_mtl_configuration(struct stmmac_priv *priv) u32 rx_queues_count = priv->plat->rx_queues_to_use; u32 tx_queues_count = priv->plat->tx_queues_to_use; - if (tx_queues_count > 1 && priv->hw->mac->set_mtl_tx_queue_weight) + if (tx_queues_count > 1) stmmac_set_tx_queue_weight(priv); /* Configure MTL RX algorithms */ - if (rx_queues_count > 1 && priv->hw->mac->prog_mtl_rx_algorithms) - priv->hw->mac->prog_mtl_rx_algorithms(priv->hw, - priv->plat->rx_sched_algorithm); + if (rx_queues_count > 1) + stmmac_prog_mtl_rx_algorithms(priv, priv->hw, + priv->plat->rx_sched_algorithm); /* Configure MTL TX algorithms */ - if (tx_queues_count > 1 && priv->hw->mac->prog_mtl_tx_algorithms) - priv->hw->mac->prog_mtl_tx_algorithms(priv->hw, - priv->plat->tx_sched_algorithm); + if (tx_queues_count > 1) + stmmac_prog_mtl_tx_algorithms(priv, priv->hw, + priv->plat->tx_sched_algorithm); /* Configure CBS in AVB TX queues */ - if (tx_queues_count > 1 && priv->hw->mac->config_cbs) + if (tx_queues_count > 1) stmmac_configure_cbs(priv); /* Map RX MTL to DMA channels */ - if (priv->hw->mac->map_mtl_to_dma) - stmmac_rx_queue_dma_chan_map(priv); + stmmac_rx_queue_dma_chan_map(priv); /* Enable MAC RX Queues */ - if (priv->hw->mac->rx_queue_enable) - stmmac_mac_enable_rx_queues(priv); + stmmac_mac_enable_rx_queues(priv); /* Set RX priorities */ - if (rx_queues_count > 1 && priv->hw->mac->rx_queue_prio) + if (rx_queues_count > 1) stmmac_mac_config_rx_queues_prio(priv); /* Set TX priorities */ - if (tx_queues_count > 1 && priv->hw->mac->tx_queue_prio) + if (tx_queues_count > 1) stmmac_mac_config_tx_queues_prio(priv); /* Set RX routing */ - if (rx_queues_count > 1 && priv->hw->mac->rx_queue_routing) + if (rx_queues_count > 1) stmmac_mac_config_rx_queues_routing(priv); } static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) { - if (priv->hw->mac->safety_feat_config && priv->dma_cap.asp) { + if (priv->dma_cap.asp) { netdev_info(priv->dev, "Enabling Safety Features\n"); - priv->hw->mac->safety_feat_config(priv->ioaddr, - priv->dma_cap.asp); + stmmac_safety_feat_config(priv, priv->ioaddr, priv->dma_cap.asp); } else { netdev_info(priv->dev, "No Safety Features support found\n"); } @@ -2559,7 +2495,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) } /* Copy the MAC addr into the HW */ - priv->hw->mac->set_umac_addr(priv->hw, dev->dev_addr, 0); + stmmac_set_umac_addr(priv, priv->hw, dev->dev_addr, 0); /* PS and related bits will be programmed according to the speed */ if (priv->hw->pcs) { @@ -2575,7 +2511,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) } /* Initialize the MAC Core */ - priv->hw->mac->core_init(priv->hw, dev); + stmmac_core_init(priv, priv->hw, dev); /* Initialize MTL*/ if (priv->synopsys_id >= DWMAC_CORE_4_00) @@ -2585,7 +2521,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) if (priv->synopsys_id >= DWMAC_CORE_5_10) stmmac_safety_feat_configuration(priv); - ret = priv->hw->mac->rx_ipc(priv->hw); + ret = stmmac_rx_ipc(priv, priv->hw); if (!ret) { netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n"); priv->plat->rx_coe = STMMAC_RX_COE_NONE; @@ -2593,7 +2529,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) } /* Enable the MAC Rx/Tx */ - priv->hw->mac->set_mac(priv->ioaddr, true); + stmmac_mac_set(priv, priv->ioaddr, true); /* Set the HW DMA mode and the COE */ stmmac_dma_operation_mode(priv); @@ -2623,13 +2559,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; - if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) { - priv->rx_riwt = MAX_DMA_RIWT; - priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT, rx_cnt); + if (priv->use_riwt) { + ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt); + if (!ret) + priv->rx_riwt = MAX_DMA_RIWT; } - if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane) - priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0); + if (priv->hw->pcs) + stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0); /* set TX and RX rings length */ stmmac_set_rings_length(priv); @@ -2637,7 +2574,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) /* Enable TSO */ if (priv->tso) { for (chan = 0; chan < tx_cnt; chan++) - priv->hw->dma->enable_tso(priv->ioaddr, 1, chan); + stmmac_enable_tso(priv, priv->ioaddr, 1, chan); } return 0; @@ -2808,7 +2745,7 @@ static int stmmac_release(struct net_device *dev) free_dma_desc_resources(priv); /* Disable the MAC Rx/Tx */ - priv->hw->mac->set_mac(priv->ioaddr, false); + stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(dev); @@ -2851,10 +2788,10 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des, buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ? TSO_MAX_BUFF_SIZE : tmp_len; - priv->hw->desc->prepare_tso_tx_desc(desc, 0, buff_size, - 0, 1, - (last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE), - 0, 0); + stmmac_prepare_tso_tx_desc(priv, desc, 0, buff_size, + 0, 1, + (last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE), + 0, 0); tmp_len -= TSO_MAX_BUFF_SIZE; } @@ -2926,7 +2863,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* set new MSS value if needed */ if (mss != tx_q->mss) { mss_desc = tx_q->dma_tx + tx_q->cur_tx; - priv->hw->desc->set_mss(mss_desc, mss); + stmmac_set_mss(priv, mss_desc, mss); tx_q->mss = mss; tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE); WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]); @@ -3012,7 +2949,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) STMMAC_COAL_TIMER(priv->tx_coal_timer)); } else { priv->tx_count_frames = 0; - priv->hw->desc->set_tx_ic(desc); + stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; } @@ -3022,11 +2959,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) priv->hwts_tx_en)) { /* declare that device is doing timestamping */ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - priv->hw->desc->enable_tx_timestamp(first); + stmmac_enable_tx_timestamp(priv, first); } /* Complete the first descriptor before granting the DMA */ - priv->hw->desc->prepare_tso_tx_desc(first, 1, + stmmac_prepare_tso_tx_desc(priv, first, 1, proto_hdr_len, pay_len, 1, tx_q->tx_skbuff_dma[first_entry].last_segment, @@ -3040,7 +2977,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) * sure that MSS's own bit is the last thing written. */ dma_wmb(); - priv->hw->desc->set_tx_owner(mss_desc); + stmmac_set_tx_owner(priv, mss_desc); } /* The own bit must be the latest setting done when prepare the @@ -3054,8 +2991,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry, tx_q->cur_tx, first, nfrags); - priv->hw->desc->display_ring((void *)tx_q->dma_tx, DMA_TX_SIZE, - 0); + stmmac_display_ring(priv, (void *)tx_q->dma_tx, DMA_TX_SIZE, 0); pr_info(">>> frame to be transmitted: "); print_pkt(skb->data, skb_headlen(skb)); @@ -3063,8 +2999,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); - priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr, - queue); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); return NETDEV_TX_OK; @@ -3136,11 +3071,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) enh_desc = priv->plat->enh_desc; /* To program the descriptors according to the size of the frame */ if (enh_desc) - is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc); + is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc); if (unlikely(is_jumbo) && likely(priv->synopsys_id < DWMAC_CORE_4_00)) { - entry = priv->hw->mode->jumbo_frm(tx_q, skb, csum_insertion); + entry = stmmac_jumbo_frm(priv, tx_q, skb, csum_insertion); if (unlikely(entry < 0)) goto dma_map_err; } @@ -3174,9 +3109,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[entry].last_segment = last_segment; /* Prepare the descriptor and set the own bit too */ - priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion, - priv->mode, 1, last_segment, - skb->len); + stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion, + priv->mode, 1, last_segment, skb->len); } /* Only the last descriptor gets to point to the skb. */ @@ -3203,7 +3137,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) else tx_head = (void *)tx_q->dma_tx; - priv->hw->desc->display_ring(tx_head, DMA_TX_SIZE, false); + stmmac_display_ring(priv, tx_head, DMA_TX_SIZE, false); netdev_dbg(priv->dev, ">>> frame to be transmitted: "); print_pkt(skb->data, skb->len); @@ -3228,7 +3162,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) STMMAC_COAL_TIMER(priv->tx_coal_timer)); } else { priv->tx_count_frames = 0; - priv->hw->desc->set_tx_ic(desc); + stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; } @@ -3259,13 +3193,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) priv->hwts_tx_en)) { /* declare that device is doing timestamping */ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - priv->hw->desc->enable_tx_timestamp(first); + stmmac_enable_tx_timestamp(priv, first); } /* Prepare the first descriptor setting the OWN bit too */ - priv->hw->desc->prepare_tx_desc(first, 1, nopaged_len, - csum_insertion, priv->mode, 1, - last_segment, skb->len); + stmmac_prepare_tx_desc(priv, first, 1, nopaged_len, + csum_insertion, priv->mode, 1, last_segment, + skb->len); /* The own bit must be the latest setting done when prepare the * descriptor and then barrier is needed to make sure that @@ -3277,10 +3211,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); if (priv->synopsys_id < DWMAC_CORE_4_00) - priv->hw->dma->enable_dma_transmission(priv->ioaddr); + stmmac_enable_dma_transmission(priv, priv->ioaddr); else - priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr, - queue); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, + queue); return NETDEV_TX_OK; @@ -3370,8 +3304,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) } else { p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]); } - if (priv->hw->mode->refill_desc3) - priv->hw->mode->refill_desc3(rx_q, p); + + stmmac_refill_desc3(priv, rx_q, p); if (rx_q->rx_zeroc_thresh > 0) rx_q->rx_zeroc_thresh--; @@ -3382,9 +3316,9 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) dma_wmb(); if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) - priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0); + stmmac_init_rx_desc(priv, p, priv->use_riwt, 0, 0); else - priv->hw->desc->set_rx_owner(p); + stmmac_set_rx_owner(priv, p); dma_wmb(); @@ -3418,7 +3352,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) else rx_head = (void *)rx_q->dma_rx; - priv->hw->desc->display_ring(rx_head, DMA_RX_SIZE, true); + stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true); } while (count < limit) { int status; @@ -3431,8 +3365,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) p = rx_q->dma_rx + entry; /* read the status of the incoming frame */ - status = priv->hw->desc->rx_status(&priv->dev->stats, - &priv->xstats, p); + status = stmmac_rx_status(priv, &priv->dev->stats, + &priv->xstats, p); /* check if managed by the DMA otherwise go ahead */ if (unlikely(status & dma_own)) break; @@ -3449,11 +3383,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) prefetch(np); - if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status)) - priv->hw->desc->rx_extended_status(&priv->dev->stats, - &priv->xstats, - rx_q->dma_erx + - entry); + if (priv->extend_desc) + stmmac_rx_extended_status(priv, &priv->dev->stats, + &priv->xstats, rx_q->dma_erx + entry); if (unlikely(status == discard_frame)) { priv->dev->stats.rx_errors++; if (priv->hwts_rx_en && !priv->extend_desc) { @@ -3479,7 +3411,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) else des = le32_to_cpu(p->des2); - frame_len = priv->hw->desc->get_rx_frame_len(p, coe); + frame_len = stmmac_get_rx_frame_len(priv, p, coe); /* If frame length is greater than skb buffer size * (preallocated during init) then the packet is @@ -3621,7 +3553,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget) work_done = stmmac_rx(priv, budget, rx_q->queue_index); if (work_done < budget) { napi_complete_done(napi, work_done); - stmmac_enable_dma_irq(priv, chan); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan); } return work_done; } @@ -3654,7 +3586,7 @@ static void stmmac_set_rx_mode(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - priv->hw->mac->set_filter(priv->hw, dev); + stmmac_set_filter(priv, priv->hw, dev); } /** @@ -3727,7 +3659,7 @@ static int stmmac_set_features(struct net_device *netdev, /* No check needed because rx_coe has been set before and it will be * fixed in case of issue. */ - priv->hw->mac->rx_ipc(priv->hw); + stmmac_rx_ipc(priv, priv->hw); return 0; } @@ -3771,8 +3703,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) /* To handle GMAC own interrupts */ if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) { - int status = priv->hw->mac->host_irq_status(priv->hw, - &priv->xstats); + int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats); if (unlikely(status)) { /* For LPI we need to save the tx status */ @@ -3787,15 +3718,14 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - status |= - priv->hw->mac->host_mtl_irq_status(priv->hw, - queue); + status |= stmmac_host_mtl_irq_status(priv, + priv->hw, queue); - if (status & CORE_IRQ_MTL_RX_OVERFLOW && - priv->hw->dma->set_rx_tail_ptr) - priv->hw->dma->set_rx_tail_ptr(priv->ioaddr, - rx_q->rx_tail_addr, - queue); + if (status & CORE_IRQ_MTL_RX_OVERFLOW) + stmmac_set_rx_tail_ptr(priv, + priv->ioaddr, + rx_q->rx_tail_addr, + queue); } } @@ -3869,7 +3799,7 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr) if (ret) return ret; - priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0); + stmmac_set_umac_addr(priv, priv->hw, ndev->dev_addr, 0); return ret; } @@ -4145,49 +4075,17 @@ static void stmmac_service_task(struct work_struct *work) */ static int stmmac_hw_init(struct stmmac_priv *priv) { - struct mac_device_info *mac; - - /* Identify the MAC HW device */ - if (priv->plat->setup) { - mac = priv->plat->setup(priv); - } else if (priv->plat->has_gmac) { - priv->dev->priv_flags |= IFF_UNICAST_FLT; - mac = dwmac1000_setup(priv->ioaddr, - priv->plat->multicast_filter_bins, - priv->plat->unicast_filter_entries, - &priv->synopsys_id); - } else if (priv->plat->has_gmac4) { - priv->dev->priv_flags |= IFF_UNICAST_FLT; - mac = dwmac4_setup(priv->ioaddr, - priv->plat->multicast_filter_bins, - priv->plat->unicast_filter_entries, - &priv->synopsys_id); - } else { - mac = dwmac100_setup(priv->ioaddr, &priv->synopsys_id); - } - if (!mac) - return -ENOMEM; - - priv->hw = mac; + int ret; /* dwmac-sun8i only work in chain mode */ if (priv->plat->has_sun8i) chain_mode = 1; + priv->chain_mode = chain_mode; - /* To use the chained or ring mode */ - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - priv->hw->mode = &dwmac4_ring_mode_ops; - } else { - if (chain_mode) { - priv->hw->mode = &chain_mode_ops; - dev_info(priv->device, "Chain mode enabled\n"); - priv->mode = STMMAC_CHAIN_MODE; - } else { - priv->hw->mode = &ring_mode_ops; - dev_info(priv->device, "Ring mode enabled\n"); - priv->mode = STMMAC_RING_MODE; - } - } + /* Initialize HW Interface */ + ret = stmmac_hwif_init(priv); + if (ret) + return ret; /* Get the HW capability (new GMAC newer than 3.50a) */ priv->hw_cap_support = stmmac_get_hw_features(priv); @@ -4221,12 +4119,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv) dev_info(priv->device, "No HW DMA feature register supported\n"); } - /* To use alternate (extended), normal or GMAC4 descriptor structures */ - if (priv->synopsys_id >= DWMAC_CORE_4_00) - priv->hw->desc = &dwmac4_desc_ops; - else - stmmac_selec_desc_mode(priv); - if (priv->plat->rx_coe) { priv->hw->rx_csum = priv->plat->rx_coe; dev_info(priv->device, "RX Checksum Offload Engine supported\n"); @@ -4458,7 +4350,7 @@ int stmmac_dvr_remove(struct device *dev) stmmac_stop_all_dma(priv); - priv->hw->mac->set_mac(priv->ioaddr, false); + stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); if (priv->plat->stmmac_rst) @@ -4507,10 +4399,10 @@ int stmmac_suspend(struct device *dev) /* Enable Power down mode by programming the PMT regs */ if (device_may_wakeup(priv->device)) { - priv->hw->mac->pmt(priv->hw, priv->wolopts); + stmmac_pmt(priv, priv->hw, priv->wolopts); priv->irq_wake = 1; } else { - priv->hw->mac->set_mac(priv->ioaddr, false); + stmmac_mac_set(priv, priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ clk_disable(priv->plat->pclk); @@ -4574,7 +4466,7 @@ int stmmac_resume(struct device *dev) */ if (device_may_wakeup(priv->device)) { spin_lock_irqsave(&priv->lock, flags); - priv->hw->mac->pmt(priv->hw, 0); + stmmac_pmt(priv, priv->hw, 0); spin_unlock_irqrestore(&priv->lock, flags); priv->irq_wake = 0; } else { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index e471a903c654..7d3a5c7f5db6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -49,9 +49,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) addend = neg_adj ? (addend - diff) : (addend + diff); spin_lock_irqsave(&priv->ptp_lock, flags); - - priv->hw->ptp->config_addend(priv->ptpaddr, addend); - + stmmac_config_addend(priv, priv->ptpaddr, addend); spin_unlock_irqrestore(&priv->ptp_lock, flags); return 0; @@ -84,10 +82,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) nsec = reminder; spin_lock_irqsave(&priv->ptp_lock, flags); - - priv->hw->ptp->adjust_systime(priv->ptpaddr, sec, nsec, neg_adj, - priv->plat->has_gmac4); - + stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, + priv->plat->has_gmac4); spin_unlock_irqrestore(&priv->ptp_lock, flags); return 0; @@ -110,9 +106,7 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts) u64 ns; spin_lock_irqsave(&priv->ptp_lock, flags); - - ns = priv->hw->ptp->get_systime(priv->ptpaddr); - + stmmac_get_systime(priv, priv->ptpaddr, &ns); spin_unlock_irqrestore(&priv->ptp_lock, flags); *ts = ns_to_timespec64(ns); @@ -137,9 +131,7 @@ static int stmmac_set_time(struct ptp_clock_info *ptp, unsigned long flags; spin_lock_irqsave(&priv->ptp_lock, flags); - - priv->hw->ptp->init_systime(priv->ptpaddr, ts->tv_sec, ts->tv_nsec); - + stmmac_init_systime(priv, priv->ptpaddr, ts->tv_sec, ts->tv_nsec); spin_unlock_irqrestore(&priv->ptp_lock, flags); return 0; diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index 8900a6fad318..c4ffdf47bad5 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -33,6 +33,8 @@ #define SGMII_LINK_MAC_MAC_FORCED 2 #define SGMII_LINK_MAC_FIBER 3 #define SGMII_LINK_MAC_PHY_NO_MDIO 4 +#define RGMII_LINK_MAC_PHY 5 +#define RGMII_LINK_MAC_PHY_NO_MDIO 7 #define XGMII_LINK_MAC_PHY 10 #define XGMII_LINK_MAC_MAC_FORCED 11 @@ -212,6 +214,7 @@ struct netcp_module { int (*add_vid)(void *intf_priv, int vid); int (*del_vid)(void *intf_priv, int vid); int (*ioctl)(void *intf_priv, struct ifreq *req, int cmd); + int (*set_rx_mode)(void *intf_priv, bool promisc); /* used internally */ struct list_head module_list; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index f5a7eb22d0f5..e40aa3e31af2 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1509,6 +1509,24 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp) } } +static int netcp_set_promiscuous(struct netcp_intf *netcp, bool promisc) +{ + struct netcp_intf_modpriv *priv; + struct netcp_module *module; + int error; + + for_each_module(netcp, priv) { + module = priv->netcp_module; + if (!module->set_rx_mode) + continue; + + error = module->set_rx_mode(priv->module_priv, promisc); + if (error) + return error; + } + return 0; +} + static void netcp_set_rx_mode(struct net_device *ndev) { struct netcp_intf *netcp = netdev_priv(ndev); @@ -1538,6 +1556,7 @@ static void netcp_set_rx_mode(struct net_device *ndev) /* finally sweep and callout into modules */ netcp_addr_sweep_del(netcp); netcp_addr_sweep_add(netcp); + netcp_set_promiscuous(netcp, promisc); spin_unlock(&netcp->lock); } @@ -2155,8 +2174,13 @@ static int netcp_probe(struct platform_device *pdev) struct device_node *child, *interfaces; struct netcp_device *netcp_device; struct device *dev = &pdev->dev; + struct netcp_module *module; int ret; + if (!knav_dma_device_ready() || + !knav_qmss_device_ready()) + return -EPROBE_DEFER; + if (!node) { dev_err(dev, "could not find device info\n"); return -ENODEV; @@ -2203,6 +2227,14 @@ static int netcp_probe(struct platform_device *pdev) /* Add the device instance to the list */ list_add_tail(&netcp_device->device_list, &netcp_devices); + /* Probe & attach any modules already registered */ + mutex_lock(&netcp_modules_lock); + for_each_netcp_module(module) { + ret = netcp_module_probe(netcp_device, module); + if (ret < 0) + dev_err(dev, "module(%s) probe failed\n", module->name); + } + mutex_unlock(&netcp_modules_lock); return 0; probe_quit_interface: diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 56dbc0b9fedc..6a728d35e776 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -21,6 +21,7 @@ #include <linux/io.h> #include <linux/module.h> #include <linux/of_mdio.h> +#include <linux/of_net.h> #include <linux/of_address.h> #include <linux/if_vlan.h> #include <linux/ptp_classify.h> @@ -42,7 +43,7 @@ /* 1G Ethernet SS defines */ #define GBE_MODULE_NAME "netcp-gbe" -#define GBE_SS_VERSION_14 0x4ed21104 +#define GBE_SS_VERSION_14 0x4ed2 #define GBE_SS_REG_INDEX 0 #define GBE_SGMII34_REG_INDEX 1 @@ -72,6 +73,11 @@ #define IS_SS_ID_NU(d) \ (GBE_IDENT((d)->ss_version) == GBE_SS_ID_NU) +#define IS_SS_ID_VER_14(d) \ + (GBE_IDENT((d)->ss_version) == GBE_SS_VERSION_14) +#define IS_SS_ID_2U(d) \ + (GBE_IDENT((d)->ss_version) == GBE_SS_ID_2U) + #define GBENU_SS_REG_INDEX 0 #define GBENU_SM_REG_INDEX 1 #define GBENU_SGMII_MODULE_OFFSET 0x100 @@ -86,7 +92,7 @@ /* 10G Ethernet SS defines */ #define XGBE_MODULE_NAME "netcp-xgbe" -#define XGBE_SS_VERSION_10 0x4ee42100 +#define XGBE_SS_VERSION_10 0x4ee4 #define XGBE_SS_REG_INDEX 0 #define XGBE_SM_REG_INDEX 1 @@ -166,6 +172,11 @@ #define GBE_RXHOOK_ORDER 0 #define GBE_DEFAULT_ALE_AGEOUT 30 #define SLAVE_LINK_IS_XGMII(s) ((s)->link_interface >= XGMII_LINK_MAC_PHY) +#define SLAVE_LINK_IS_RGMII(s) \ + (((s)->link_interface >= RGMII_LINK_MAC_PHY) && \ + ((s)->link_interface <= RGMII_LINK_MAC_PHY_NO_MDIO)) +#define SLAVE_LINK_IS_SGMII(s) \ + ((s)->link_interface <= SGMII_LINK_MAC_PHY_NO_MDIO) #define NETCP_LINK_STATE_INVALID -1 #define GBE_SET_REG_OFS(p, rb, rn) p->rb##_ofs.rn = \ @@ -549,6 +560,7 @@ struct gbe_ss_regs { struct gbe_ss_regs_ofs { u16 id_ver; u16 control; + u16 rgmii_status; /* 2U */ }; struct gbe_switch_regs { @@ -591,6 +603,7 @@ struct gbe_port_regs { struct gbe_port_regs_ofs { u16 port_vlan; u16 tx_pri_map; + u16 rx_pri_map; u16 sa_lo; u16 sa_hi; u16 ts_ctl; @@ -695,6 +708,7 @@ struct gbe_slave { u32 link_interface; u32 mac_control; u8 phy_port_t; + struct device_node *node; struct device_node *phy_node; struct ts_ctl ts_ctl; struct list_head slave_list; @@ -1915,7 +1929,7 @@ static void keystone_get_ethtool_stats(struct net_device *ndev, gbe_dev = gbe_intf->gbe_dev; spin_lock_bh(&gbe_dev->hw_stats_lock); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_update_stats_ver14(gbe_dev, data); else gbe_update_stats(gbe_dev, data); @@ -2091,8 +2105,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, ALE_PORT_STATE_FORWARD); if (ndev && slave->open && - slave->link_interface != SGMII_LINK_MAC_PHY && - slave->link_interface != XGMII_LINK_MAC_PHY) + ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && + (slave->link_interface != XGMII_LINK_MAC_PHY))) netif_carrier_on(ndev); } else { writel(mac_control, GBE_REG_ADDR(slave, emac_regs, @@ -2101,8 +2116,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); if (ndev && - slave->link_interface != SGMII_LINK_MAC_PHY && - slave->link_interface != XGMII_LINK_MAC_PHY) + ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && + (slave->link_interface != XGMII_LINK_MAC_PHY))) netif_carrier_off(ndev); } @@ -2115,23 +2131,35 @@ static bool gbe_phy_link_status(struct gbe_slave *slave) return !slave->phy || slave->phy->link; } +#define RGMII_REG_STATUS_LINK BIT(0) + +static void netcp_2u_rgmii_get_port_link(struct gbe_priv *gbe_dev, bool *status) +{ + u32 val = 0; + + val = readl(GBE_REG_ADDR(gbe_dev, ss_regs, rgmii_status)); + *status = !!(val & RGMII_REG_STATUS_LINK); +} + static void netcp_ethss_update_link_state(struct gbe_priv *gbe_dev, struct gbe_slave *slave, struct net_device *ndev) { - int sp = slave->slave_num; - int phy_link_state, sgmii_link_state = 1, link_state; + bool sw_link_state = true, phy_link_state; + int sp = slave->slave_num, link_state; if (!slave->open) return; - if (!SLAVE_LINK_IS_XGMII(slave)) { - sgmii_link_state = - netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp); - } + if (SLAVE_LINK_IS_RGMII(slave)) + netcp_2u_rgmii_get_port_link(gbe_dev, + &sw_link_state); + if (SLAVE_LINK_IS_SGMII(slave)) + sw_link_state = + netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp); phy_link_state = gbe_phy_link_status(slave); - link_state = phy_link_state & sgmii_link_state; + link_state = phy_link_state & sw_link_state; if (atomic_xchg(&slave->link_state, link_state) != link_state) netcp_ethss_link_state_action(gbe_dev, ndev, slave, @@ -2205,7 +2233,7 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave, max_rx_len = NETCP_MAX_FRAME_SIZE; /* Enable correct MII mode at SS level */ - if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) && + if (IS_SS_ID_XGBE(gbe_dev) && (slave->link_interface >= XGMII_LINK_MAC_PHY)) { xgmii_mode = readl(GBE_REG_ADDR(gbe_dev, ss_regs, control)); xgmii_mode |= (1 << slave->slave_num); @@ -2236,7 +2264,8 @@ static void gbe_slave_stop(struct gbe_intf *intf) struct gbe_priv *gbe_dev = intf->gbe_dev; struct gbe_slave *slave = intf->slave; - gbe_sgmii_rtreset(gbe_dev, slave, true); + if (!IS_SS_ID_2U(gbe_dev)) + gbe_sgmii_rtreset(gbe_dev, slave, true); gbe_port_reset(slave); /* Disable forwarding */ cpsw_ale_control_set(gbe_dev->ale, slave->port_num, @@ -2271,11 +2300,20 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) void (*hndlr)(struct net_device *) = gbe_adjust_link; - gbe_sgmii_config(priv, slave); + if (!IS_SS_ID_2U(priv)) + gbe_sgmii_config(priv, slave); gbe_port_reset(slave); - gbe_sgmii_rtreset(priv, slave, false); + if (!IS_SS_ID_2U(priv)) + gbe_sgmii_rtreset(priv, slave, false); gbe_port_config(priv, slave, priv->rx_packet_max); gbe_set_slave_mac(slave, gbe_intf); + /* For NU & 2U switch, map the vlan priorities to zero + * as we only configure to use priority 0 + */ + if (IS_SS_ID_MU(priv)) + writel(HOST_TX_PRI_MAP_DEFAULT, + GBE_REG_ADDR(slave, port_regs, rx_pri_map)); + /* enable forwarding */ cpsw_ale_control_set(priv->ale, slave->port_num, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); @@ -2286,6 +2324,21 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) has_phy = true; phy_mode = PHY_INTERFACE_MODE_SGMII; slave->phy_port_t = PORT_MII; + } else if (slave->link_interface == RGMII_LINK_MAC_PHY) { + has_phy = true; + phy_mode = of_get_phy_mode(slave->node); + /* if phy-mode is not present, default to + * PHY_INTERFACE_MODE_RGMII + */ + if (phy_mode < 0) + phy_mode = PHY_INTERFACE_MODE_RGMII; + + if (!phy_interface_mode_is_rgmii(phy_mode)) { + dev_err(priv->dev, + "Unsupported phy mode %d\n", phy_mode); + return -EINVAL; + } + slave->phy_port_t = PORT_MII; } else if (slave->link_interface == XGMII_LINK_MAC_PHY) { has_phy = true; phy_mode = PHY_INTERFACE_MODE_NA; @@ -2293,7 +2346,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) } if (has_phy) { - if (priv->ss_version == XGBE_SS_VERSION_10) + if (IS_SS_ID_XGBE(priv)) hndlr = xgbe_adjust_link; slave->phy = of_phy_connect(gbe_intf->ndev, @@ -2722,6 +2775,61 @@ static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *req) } #endif /* CONFIG_TI_CPTS */ +static int gbe_set_rx_mode(void *intf_priv, bool promisc) +{ + struct gbe_intf *gbe_intf = intf_priv; + struct gbe_priv *gbe_dev = gbe_intf->gbe_dev; + struct cpsw_ale *ale = gbe_dev->ale; + unsigned long timeout; + int i, ret = -ETIMEDOUT; + + /* Disable(1)/Enable(0) Learn for all ports (host is port 0 and + * slaves are port 1 and up + */ + for (i = 0; i <= gbe_dev->num_slaves; i++) { + cpsw_ale_control_set(ale, i, + ALE_PORT_NOLEARN, !!promisc); + cpsw_ale_control_set(ale, i, + ALE_PORT_NO_SA_UPDATE, !!promisc); + } + + if (!promisc) { + /* Don't Flood All Unicast Packets to Host port */ + cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0); + dev_vdbg(gbe_dev->dev, "promiscuous mode disabled\n"); + return 0; + } + + timeout = jiffies + HZ; + + /* Clear All Untouched entries */ + cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1); + do { + cpu_relax(); + if (cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) { + ret = 0; + break; + } + + } while (time_after(timeout, jiffies)); + + /* Make sure it is not a false timeout */ + if (ret && !cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) + return ret; + + cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1); + + /* Clear all mcast from ALE */ + cpsw_ale_flush_multicast(ale, + GBE_PORT_MASK(gbe_dev->ale_ports), + -1); + + /* Flood All Unicast Packets to Host port */ + cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1); + dev_vdbg(gbe_dev->dev, "promiscuous mode enabled\n"); + return ret; +} + static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd) { struct gbe_intf *gbe_intf = intf_priv; @@ -2764,7 +2872,7 @@ static void netcp_ethss_timer(struct timer_list *t) /* A timer runs as a BH, no need to block them */ spin_lock(&gbe_dev->hw_stats_lock); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_update_stats_ver14(gbe_dev, NULL); else gbe_update_stats(gbe_dev, NULL); @@ -2807,7 +2915,7 @@ static int gbe_open(void *intf_priv, struct net_device *ndev) GBE_RTL_VERSION(reg), GBE_IDENT(reg)); /* For 10G and on NetCP 1.5, use directed to port */ - if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) || IS_SS_ID_MU(gbe_dev)) + if (IS_SS_ID_XGBE(gbe_dev) || IS_SS_ID_MU(gbe_dev)) gbe_intf->tx_pipe.flags = SWITCH_TO_PORT_IN_TAGINFO; if (gbe_dev->enable_ale) @@ -2911,8 +3019,10 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, slave->link_interface = SGMII_LINK_MAC_PHY; } + slave->node = node; slave->open = false; if ((slave->link_interface == SGMII_LINK_MAC_PHY) || + (slave->link_interface == RGMII_LINK_MAC_PHY) || (slave->link_interface == XGMII_LINK_MAC_PHY)) slave->phy_node = of_parse_phandle(node, "phy-handle", 0); slave->port_num = gbe_get_slave_port(gbe_dev, slave->slave_num); @@ -2924,7 +3034,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, /* Emac regs memmap are contiguous but port regs are not */ port_reg_num = slave->slave_num; - if (gbe_dev->ss_version == GBE_SS_VERSION_14) { + if (IS_SS_ID_VER_14(gbe_dev)) { if (slave->slave_num > 1) { port_reg_ofs = GBE13_SLAVE_PORT2_OFFSET; port_reg_num -= 2; @@ -2939,7 +3049,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, emac_reg_ofs = GBENU_EMAC_OFFSET; port_reg_blk_sz = 0x1000; emac_reg_blk_sz = 0x1000; - } else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) { + } else if (IS_SS_ID_XGBE(gbe_dev)) { port_reg_ofs = XGBE10_SLAVE_PORT_OFFSET; emac_reg_ofs = XGBE10_EMAC_OFFSET; port_reg_blk_sz = 0x30; @@ -2955,7 +3065,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, slave->emac_regs = gbe_dev->switch_regs + emac_reg_ofs + (emac_reg_blk_sz * slave->slave_num); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) { + if (IS_SS_ID_VER_14(gbe_dev)) { /* Initialize slave port register offsets */ GBE_SET_REG_OFS(slave, port_regs, port_vlan); GBE_SET_REG_OFS(slave, port_regs, tx_pri_map); @@ -2976,6 +3086,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, /* Initialize slave port register offsets */ GBENU_SET_REG_OFS(slave, port_regs, port_vlan); GBENU_SET_REG_OFS(slave, port_regs, tx_pri_map); + GBENU_SET_REG_OFS(slave, port_regs, rx_pri_map); GBENU_SET_REG_OFS(slave, port_regs, sa_lo); GBENU_SET_REG_OFS(slave, port_regs, sa_hi); GBENU_SET_REG_OFS(slave, port_regs, ts_ctl); @@ -2989,7 +3100,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, GBENU_SET_REG_OFS(slave, emac_regs, mac_control); GBENU_SET_REG_OFS(slave, emac_regs, soft_reset); - } else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) { + } else if (IS_SS_ID_XGBE(gbe_dev)) { /* Initialize slave port register offsets */ XGBE_SET_REG_OFS(slave, port_regs, port_vlan); XGBE_SET_REG_OFS(slave, port_regs, tx_pri_map); @@ -3039,7 +3150,8 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, continue; } - gbe_sgmii_config(gbe_dev, slave); + if (!IS_SS_ID_2U(gbe_dev)) + gbe_sgmii_config(gbe_dev, slave); gbe_port_reset(slave); gbe_port_config(gbe_dev, slave, gbe_dev->rx_packet_max); list_add_tail(&slave->slave_list, &gbe_dev->secondary_slaves); @@ -3073,6 +3185,9 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, if (slave->link_interface == SGMII_LINK_MAC_PHY) { phy_mode = PHY_INTERFACE_MODE_SGMII; slave->phy_port_t = PORT_MII; + } else if (slave->link_interface == RGMII_LINK_MAC_PHY) { + phy_mode = PHY_INTERFACE_MODE_RGMII; + slave->phy_port_t = PORT_MII; } else { phy_mode = PHY_INTERFACE_MODE_NA; slave->phy_port_t = PORT_FIBRE; @@ -3080,6 +3195,7 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, for_each_sec_slave(slave, gbe_dev) { if ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && (slave->link_interface != XGMII_LINK_MAC_PHY)) continue; slave->phy = @@ -3355,7 +3471,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, gbe_dev->num_stats_mods = gbe_dev->max_num_ports; gbe_dev->et_stats = gbenu_et_stats; - if (IS_SS_ID_NU(gbe_dev)) + if (IS_SS_ID_MU(gbe_dev)) gbe_dev->num_et_stats = GBENU_ET_STATS_HOST_SIZE + (gbe_dev->max_num_slaves * GBENU_ET_STATS_PORT_SIZE); else @@ -3396,7 +3512,9 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, } gbe_dev->switch_regs = regs; - gbe_dev->sgmii_port_regs = gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET; + if (!IS_SS_ID_2U(gbe_dev)) + gbe_dev->sgmii_port_regs = + gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET; /* Although sgmii modules are mem mapped to one contiguous * region on GBENU devices, setting sgmii_port34_regs allows @@ -3419,6 +3537,8 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, /* Subsystem registers */ GBENU_SET_REG_OFS(gbe_dev, ss_regs, id_ver); + /* ok to set for MU, but used by 2U only */ + GBENU_SET_REG_OFS(gbe_dev, ss_regs, rgmii_status); /* Switch module registers */ GBENU_SET_REG_OFS(gbe_dev, switch_regs, id_ver); @@ -3464,6 +3584,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, gbe_dev->max_num_slaves = 8; } else if (of_device_is_compatible(node, "ti,netcp-gbe-2")) { gbe_dev->max_num_slaves = 1; + gbe_module.set_rx_mode = gbe_set_rx_mode; } else if (of_device_is_compatible(node, "ti,netcp-xgbe")) { gbe_dev->max_num_slaves = 2; } else { @@ -3508,7 +3629,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, dev_dbg(dev, "ss_version: 0x%08x\n", gbe_dev->ss_version); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) ret = set_gbe_ethss14_priv(gbe_dev, node); else if (IS_SS_ID_MU(gbe_dev)) ret = set_gbenu_ethss_priv(gbe_dev, node); @@ -3606,7 +3727,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, spin_lock_bh(&gbe_dev->hw_stats_lock); for (i = 0; i < gbe_dev->num_stats_mods; i++) { - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_reset_mod_stats_ver14(gbe_dev, i); else gbe_reset_mod_stats(gbe_dev, i); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index b919e89a9b93..750eaa53bf0c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -36,6 +36,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define GENEVE_VER 0 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) +#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) +#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) /* per-network namespace private data for this module */ struct geneve_net { @@ -826,8 +828,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, return PTR_ERR(rt); if (skb_dst(skb)) { - int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - - GENEVE_BASE_HLEN - info->options_len - 14; + int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN - + info->options_len; skb_dst_update_pmtu(skb, mtu); } @@ -872,8 +874,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, return PTR_ERR(dst); if (skb_dst(skb)) { - int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - - GENEVE_BASE_HLEN - info->options_len - 14; + int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len; skb_dst_update_pmtu(skb, mtu); } @@ -941,11 +942,10 @@ tx_error: static int geneve_change_mtu(struct net_device *dev, int new_mtu) { - /* Only possible if called internally, ndo_change_mtu path's new_mtu - * is guaranteed to be between dev->min_mtu and dev->max_mtu. - */ if (new_mtu > dev->max_mtu) new_mtu = dev->max_mtu; + else if (new_mtu < dev->min_mtu) + new_mtu = dev->min_mtu; dev->mtu = new_mtu; return 0; @@ -1261,7 +1261,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], } if (data[IFLA_GENEVE_REMOTE6]) { - #if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { attrtype = IFLA_GENEVE_REMOTE6; goto change_notsup; @@ -1387,6 +1387,48 @@ change_notsup: return -EOPNOTSUPP; } +static void geneve_link_config(struct net_device *dev, + struct ip_tunnel_info *info, struct nlattr *tb[]) +{ + struct geneve_dev *geneve = netdev_priv(dev); + int ldev_mtu = 0; + + if (tb[IFLA_MTU]) { + geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + return; + } + + switch (ip_tunnel_info_af(info)) { + case AF_INET: { + struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; + struct rtable *rt = ip_route_output_key(geneve->net, &fl4); + + if (!IS_ERR(rt) && rt->dst.dev) { + ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; + ip_rt_put(rt); + } + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct rt6_info *rt = rt6_lookup(geneve->net, + &info->key.u.ipv6.dst, NULL, 0, + NULL, 0); + + if (rt && rt->dst.dev) + ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; + ip6_rt_put(rt); + break; + } +#endif + } + + if (ldev_mtu <= 0) + return; + + geneve_change_mtu(dev, ldev_mtu - info->options_len); +} + static int geneve_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1402,8 +1444,14 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (err) return err; - return geneve_configure(net, dev, extack, &info, metadata, - use_udp6_rx_checksums); + err = geneve_configure(net, dev, extack, &info, metadata, + use_udp6_rx_checksums); + if (err) + return err; + + geneve_link_config(dev, &info, tb); + + return 0; } /* Quiesces the geneve device data path for both TX and RX. @@ -1477,8 +1525,10 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], if (err) return err; - if (!geneve_dst_addr_equal(&geneve->info, &info)) + if (!geneve_dst_addr_equal(&geneve->info, &info)) { dst_cache_reset(&info.dst_cache); + geneve_link_config(dev, &info, tb); + } geneve_quiesce(geneve, &gs4, &gs6); geneve->info = info; diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index c180b480f8ef..13e4c1eff353 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -217,7 +217,7 @@ static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, c = *s++; else if (len > 1) c = crc >> 8; - else if (len > 0) + else c = crc & 0xff; len--; diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index 936968d23559..0765d5f61714 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -1,5 +1,6 @@ config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" depends on HYPERV + select UCS2_STRING help Select this option to enable the Hyper-V virtual network driver. diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 960f06141472..6ebe39a3dde6 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -237,6 +237,8 @@ void netvsc_switch_datapath(struct net_device *nv_dev, bool vf); #define NVSP_PROTOCOL_VERSION_2 0x30002 #define NVSP_PROTOCOL_VERSION_4 0x40000 #define NVSP_PROTOCOL_VERSION_5 0x50000 +#define NVSP_PROTOCOL_VERSION_6 0x60000 +#define NVSP_PROTOCOL_VERSION_61 0x60001 enum { NVSP_MSG_TYPE_NONE = 0, @@ -308,6 +310,12 @@ enum { NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, + + /* Version 6 messages */ + NVSP_MSG6_TYPE_PD_API, + NVSP_MSG6_TYPE_PD_POST_BATCH, + + NVSP_MSG6_MAX = NVSP_MSG6_TYPE_PD_POST_BATCH }; enum { @@ -619,12 +627,168 @@ union nvsp_5_message_uber { struct nvsp_5_send_indirect_table send_table; } __packed; +enum nvsp_6_pd_api_op { + PD_API_OP_CONFIG = 1, + PD_API_OP_SW_DATAPATH, /* Switch Datapath */ + PD_API_OP_OPEN_PROVIDER, + PD_API_OP_CLOSE_PROVIDER, + PD_API_OP_CREATE_QUEUE, + PD_API_OP_FLUSH_QUEUE, + PD_API_OP_FREE_QUEUE, + PD_API_OP_ALLOC_COM_BUF, /* Allocate Common Buffer */ + PD_API_OP_FREE_COM_BUF, /* Free Common Buffer */ + PD_API_OP_MAX +}; + +struct grp_affinity { + u64 mask; + u16 grp; + u16 reserved[3]; +} __packed; + +struct nvsp_6_pd_api_req { + u32 op; + + union { + /* MMIO information is sent from the VM to VSP */ + struct __packed { + u64 mmio_pa; /* MMIO Physical Address */ + u32 mmio_len; + + /* Number of PD queues a VM can support */ + u16 num_subchn; + } config; + + /* Switch Datapath */ + struct __packed { + /* Host Datapath Is PacketDirect */ + u8 host_dpath_is_pd; + + /* Guest PacketDirect Is Enabled */ + u8 guest_pd_enabled; + } sw_dpath; + + /* Open Provider*/ + struct __packed { + u32 prov_id; /* Provider id */ + u32 flag; + } open_prov; + + /* Close Provider */ + struct __packed { + u32 prov_id; + } cls_prov; + + /* Create Queue*/ + struct __packed { + u32 prov_id; + u16 q_id; + u16 q_size; + u8 is_recv_q; + u8 is_rss_q; + u32 recv_data_len; + struct grp_affinity affy; + } cr_q; + + /* Delete Queue*/ + struct __packed { + u32 prov_id; + u16 q_id; + } del_q; + + /* Flush Queue */ + struct __packed { + u32 prov_id; + u16 q_id; + } flush_q; + + /* Allocate Common Buffer */ + struct __packed { + u32 len; + u32 pf_node; /* Preferred Node */ + u16 region_id; + } alloc_com_buf; + + /* Free Common Buffer */ + struct __packed { + u32 len; + u64 pa; /* Physical Address */ + u32 pf_node; /* Preferred Node */ + u16 region_id; + u8 cache_type; + } free_com_buf; + } __packed; +} __packed; + +struct nvsp_6_pd_api_comp { + u32 op; + u32 status; + + union { + struct __packed { + /* actual number of PD queues allocated to the VM */ + u16 num_pd_q; + + /* Num Receive Rss PD Queues */ + u8 num_rss_q; + + u8 is_supported; /* Is supported by VSP */ + u8 is_enabled; /* Is enabled by VSP */ + } config; + + /* Open Provider */ + struct __packed { + u32 prov_id; + } open_prov; + + /* Create Queue */ + struct __packed { + u32 prov_id; + u16 q_id; + u16 q_size; + u32 recv_data_len; + struct grp_affinity affy; + } cr_q; + + /* Allocate Common Buffer */ + struct __packed { + u64 pa; /* Physical Address */ + u32 len; + u32 pf_node; /* Preferred Node */ + u16 region_id; + u8 cache_type; + } alloc_com_buf; + } __packed; +} __packed; + +struct nvsp_6_pd_buf { + u32 region_offset; + u16 region_id; + u16 is_partial:1; + u16 reserved:15; +} __packed; + +struct nvsp_6_pd_batch_msg { + struct nvsp_message_header hdr; + u16 count; + u16 guest2host:1; + u16 is_recv:1; + u16 reserved:14; + struct nvsp_6_pd_buf pd_buf[0]; +} __packed; + +union nvsp_6_message_uber { + struct nvsp_6_pd_api_req pd_req; + struct nvsp_6_pd_api_comp pd_comp; +} __packed; + union nvsp_all_messages { union nvsp_message_init_uber init_msg; union nvsp_1_message_uber v1_msg; union nvsp_2_message_uber v2_msg; union nvsp_4_message_uber v4_msg; union nvsp_5_message_uber v5_msg; + union nvsp_6_message_uber v6_msg; } __packed; /* ALL Messages */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 04f611e6f678..d2ee66c259a7 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -525,7 +525,8 @@ static int netvsc_connect_vsp(struct hv_device *device, struct net_device *ndev = hv_get_drvdata(device); static const u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, - NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 + NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5, + NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61 }; struct nvsp_message *init_packet; int ndis_version, i, ret; @@ -651,16 +652,14 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device, sync_change_bit(index, net_device->send_section_map); } -static void netvsc_send_tx_complete(struct netvsc_device *net_device, - struct vmbus_channel *incoming_channel, - struct hv_device *device, +static void netvsc_send_tx_complete(struct net_device *ndev, + struct netvsc_device *net_device, + struct vmbus_channel *channel, const struct vmpacket_descriptor *desc, int budget) { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; - struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *ndev_ctx = netdev_priv(ndev); - struct vmbus_channel *channel = device->channel; u16 q_idx = 0; int queue_sends; @@ -674,7 +673,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, if (send_index != NETVSC_INVALID_INDEX) netvsc_free_send_slot(net_device, send_index); q_idx = packet->q_idx; - channel = incoming_channel; tx_stats = &net_device->chan_table[q_idx].tx_stats; @@ -704,14 +702,13 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, } } -static void netvsc_send_completion(struct netvsc_device *net_device, +static void netvsc_send_completion(struct net_device *ndev, + struct netvsc_device *net_device, struct vmbus_channel *incoming_channel, - struct hv_device *device, const struct vmpacket_descriptor *desc, int budget) { - struct nvsp_message *nvsp_packet = hv_pkt_data(desc); - struct net_device *ndev = hv_get_drvdata(device); + const struct nvsp_message *nvsp_packet = hv_pkt_data(desc); switch (nvsp_packet->hdr.msg_type) { case NVSP_MSG_TYPE_INIT_COMPLETE: @@ -725,8 +722,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device, break; case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: - netvsc_send_tx_complete(net_device, incoming_channel, - device, desc, budget); + netvsc_send_tx_complete(ndev, net_device, incoming_channel, + desc, budget); break; default: @@ -1091,12 +1088,11 @@ static void enq_receive_complete(struct net_device *ndev, static int netvsc_receive(struct net_device *ndev, struct netvsc_device *net_device, - struct net_device_context *net_device_ctx, - struct hv_device *device, struct vmbus_channel *channel, const struct vmpacket_descriptor *desc, - struct nvsp_message *nvsp) + const struct nvsp_message *nvsp) { + struct net_device_context *net_device_ctx = netdev_priv(ndev); const struct vmtransfer_page_packet_header *vmxferpage_packet = container_of(desc, const struct vmtransfer_page_packet_header, d); u16 q_idx = channel->offermsg.offer.sub_channel_index; @@ -1157,13 +1153,12 @@ static int netvsc_receive(struct net_device *ndev, return count; } -static void netvsc_send_table(struct hv_device *hdev, - struct nvsp_message *nvmsg) +static void netvsc_send_table(struct net_device *ndev, + const struct nvsp_message *nvmsg) { - struct net_device *ndev = hv_get_drvdata(hdev); struct net_device_context *net_device_ctx = netdev_priv(ndev); - int i; u32 count, *tab; + int i; count = nvmsg->msg.v5_msg.send_table.count; if (count != VRSS_SEND_TAB_SIZE) { @@ -1178,24 +1173,25 @@ static void netvsc_send_table(struct hv_device *hdev, net_device_ctx->tx_table[i] = tab[i]; } -static void netvsc_send_vf(struct net_device_context *net_device_ctx, - struct nvsp_message *nvmsg) +static void netvsc_send_vf(struct net_device *ndev, + const struct nvsp_message *nvmsg) { + struct net_device_context *net_device_ctx = netdev_priv(ndev); + net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; } -static inline void netvsc_receive_inband(struct hv_device *hdev, - struct net_device_context *net_device_ctx, - struct nvsp_message *nvmsg) +static void netvsc_receive_inband(struct net_device *ndev, + const struct nvsp_message *nvmsg) { switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: - netvsc_send_table(hdev, nvmsg); + netvsc_send_table(ndev, nvmsg); break; case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: - netvsc_send_vf(net_device_ctx, nvmsg); + netvsc_send_vf(ndev, nvmsg); break; } } @@ -1207,24 +1203,23 @@ static int netvsc_process_raw_pkt(struct hv_device *device, const struct vmpacket_descriptor *desc, int budget) { - struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct nvsp_message *nvmsg = hv_pkt_data(desc); + const struct nvsp_message *nvmsg = hv_pkt_data(desc); trace_nvsp_recv(ndev, channel, nvmsg); switch (desc->type) { case VM_PKT_COMP: - netvsc_send_completion(net_device, channel, device, + netvsc_send_completion(ndev, net_device, channel, desc, budget); break; case VM_PKT_DATA_USING_XFER_PAGES: - return netvsc_receive(ndev, net_device, net_device_ctx, - device, channel, desc, nvmsg); + return netvsc_receive(ndev, net_device, channel, + desc, nvmsg); break; case VM_PKT_DATA_INBAND: - netvsc_receive_inband(device, net_device_ctx, nvmsg); + netvsc_receive_inband(ndev, nvmsg); break; default: diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 6b127be781d9..3b6dbacaf77d 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -29,6 +29,7 @@ #include <linux/nls.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> +#include <linux/ucs2_string.h> #include "hyperv_net.h" #include "netvsc_trace.h" @@ -1223,6 +1224,29 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, return ret; } +static void rndis_get_friendly_name(struct net_device *net, + struct rndis_device *rndis_device, + struct netvsc_device *net_device) +{ + ucs2_char_t wname[256]; + unsigned long len; + u8 ifalias[256]; + u32 size; + + size = sizeof(wname); + if (rndis_filter_query_device(rndis_device, net_device, + RNDIS_OID_GEN_FRIENDLY_NAME, + wname, &size) != 0) + return; + + /* Convert Windows Unicode string to UTF-8 */ + len = ucs2_as_utf8(ifalias, wname, sizeof(ifalias)); + + /* ignore the default value from host */ + if (strcmp(ifalias, "Network Adapter") != 0) + dev_set_alias(net, ifalias, len); +} + struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct netvsc_device_info *device_info) { @@ -1276,6 +1300,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); + /* Get friendly name as ifalias*/ + if (!net->ifalias) + rndis_get_friendly_name(net, rndis_device, net_device); + /* Query and set hardware capabilities */ ret = rndis_netdev_set_hwcaps(rndis_device, net_device); if (ret != 0) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 725f4b4afc6d..adde8fc45588 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -514,6 +514,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) const struct macvlan_dev *vlan = netdev_priv(dev); const struct macvlan_port *port = vlan->port; const struct macvlan_dev *dest; + void *accel_priv = NULL; if (vlan->mode == MACVLAN_MODE_BRIDGE) { const struct ethhdr *eth = (void *)skb->data; @@ -533,9 +534,14 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) } } + /* For packets that are non-multicast and not bridged we will pass + * the necessary information so that the lowerdev can distinguish + * the source of the packets via the accel_priv value. + */ + accel_priv = vlan->accel_priv; xmit_world: skb->dev = vlan->lowerdev; - return dev_queue_xmit(skb); + return dev_queue_xmit_accel(skb, accel_priv); } static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb) @@ -552,19 +558,14 @@ static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, str static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct macvlan_dev *vlan = netdev_priv(dev); unsigned int len = skb->len; int ret; - struct macvlan_dev *vlan = netdev_priv(dev); if (unlikely(netpoll_tx_running(dev))) return macvlan_netpoll_send_skb(vlan, skb); - if (vlan->fwd_priv) { - skb->dev = vlan->lowerdev; - ret = dev_queue_xmit_accel(skb, vlan->fwd_priv); - } else { - ret = macvlan_queue_xmit(skb, dev); - } + ret = macvlan_queue_xmit(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *pcpu_stats; @@ -613,26 +614,27 @@ static int macvlan_open(struct net_device *dev) goto hash_add; } - if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) { - vlan->fwd_priv = - lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev); - - /* If we get a NULL pointer back, or if we get an error - * then we should just fall through to the non accelerated path - */ - if (IS_ERR_OR_NULL(vlan->fwd_priv)) { - vlan->fwd_priv = NULL; - } else - return 0; - } - err = -EBUSY; if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; - err = dev_uc_add(lowerdev, dev->dev_addr); - if (err < 0) - goto out; + /* Attempt to populate accel_priv which is used to offload the L2 + * forwarding requests for unicast packets. + */ + if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) + vlan->accel_priv = + lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev); + + /* If earlier attempt to offload failed, or accel_priv is not + * populated we must add the unicast address to the lower device. + */ + if (IS_ERR_OR_NULL(vlan->accel_priv)) { + vlan->accel_priv = NULL; + err = dev_uc_add(lowerdev, dev->dev_addr); + if (err < 0) + goto out; + } + if (dev->flags & IFF_ALLMULTI) { err = dev_set_allmulti(lowerdev, 1); if (err < 0) @@ -653,13 +655,14 @@ clear_multi: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(lowerdev, -1); del_unicast: - dev_uc_del(lowerdev, dev->dev_addr); -out: - if (vlan->fwd_priv) { + if (vlan->accel_priv) { lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, - vlan->fwd_priv); - vlan->fwd_priv = NULL; + vlan->accel_priv); + vlan->accel_priv = NULL; + } else { + dev_uc_del(lowerdev, dev->dev_addr); } +out: return err; } @@ -668,11 +671,10 @@ static int macvlan_stop(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (vlan->fwd_priv) { + if (vlan->accel_priv) { lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, - vlan->fwd_priv); - vlan->fwd_priv = NULL; - return 0; + vlan->accel_priv); + vlan->accel_priv = NULL; } dev_uc_unsync(lowerdev, dev); diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index bdfbabb86ee0..edb8b9ab827f 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -218,6 +218,12 @@ config AQUANTIA_PHY ---help--- Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 +config ASIX_PHY + tristate "Asix PHYs" + help + Currently supports the Asix Electronics PHY found in the X-Surf 100 + AX88796B package. + config AT803X_PHY tristate "AT803X PHYs" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 01acbcb2c798..701ca0b8717e 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -45,6 +45,7 @@ obj-y += $(sfp-obj-y) $(sfp-obj-m) obj-$(CONFIG_AMD_PHY) += amd.o obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o +obj-$(CONFIG_ASIX_PHY) += asix.o obj-$(CONFIG_AT803X_PHY) += at803x.o obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o diff --git a/drivers/net/phy/asix.c b/drivers/net/phy/asix.c new file mode 100644 index 000000000000..8ebe7f5484ae --- /dev/null +++ b/drivers/net/phy/asix.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Driver for Asix PHYs + * + * Author: Michael Schmitz <schmitzmic@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mii.h> +#include <linux/phy.h> + +#define PHY_ID_ASIX_AX88796B 0x003b1841 + +MODULE_DESCRIPTION("Asix PHY driver"); +MODULE_AUTHOR("Michael Schmitz <schmitzmic@gmail.com>"); +MODULE_LICENSE("GPL"); + +/** + * asix_soft_reset - software reset the PHY via BMCR_RESET bit + * @phydev: target phy_device struct + * + * Description: Perform a software PHY reset using the standard + * BMCR_RESET bit and poll for the reset bit to be cleared. + * Toggle BMCR_RESET bit off to accommodate broken AX8796B PHY implementation + * such as used on the Individual Computers' X-Surf 100 Zorro card. + * + * Returns: 0 on success, < 0 on failure + */ +static int asix_soft_reset(struct phy_device *phydev) +{ + int ret; + + /* Asix PHY won't reset unless reset bit toggles */ + ret = phy_write(phydev, MII_BMCR, 0); + if (ret < 0) + return ret; + + return genphy_soft_reset(phydev); +} + +static struct phy_driver asix_driver[] = { { + .phy_id = PHY_ID_ASIX_AX88796B, + .name = "Asix Electronics AX88796B", + .phy_id_mask = 0xfffffff0, + .features = PHY_BASIC_FEATURES, + .soft_reset = asix_soft_reset, +} }; + +module_phy_driver(asix_driver); + +static struct mdio_device_id __maybe_unused asix_tbl[] = { + { PHY_ID_ASIX_AX88796B, 0xfffffff0 }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, asix_tbl); diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 5ad130c3da43..0876aec7328c 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -346,10 +346,6 @@ void bcm_phy_get_strings(struct phy_device *phydev, u8 *data) } EXPORT_SYMBOL_GPL(bcm_phy_get_strings); -#ifndef UINT64_MAX -#define UINT64_MAX (u64)(~((u64)0)) -#endif - /* Caller is supposed to provide appropriate storage for the library code to * access the shadow copy */ @@ -362,7 +358,7 @@ static u64 bcm_phy_get_stat(struct phy_device *phydev, u64 *shadow, val = phy_read(phydev, stat.reg); if (val < 0) { - ret = UINT64_MAX; + ret = U64_MAX; } else { val >>= stat.shift; val = val & ((1 << stat.bits) - 1); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 25e2a099b71c..b8f57e9b9379 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1482,9 +1482,6 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data) } } -#ifndef UINT64_MAX -#define UINT64_MAX (u64)(~((u64)0)) -#endif static u64 marvell_get_stat(struct phy_device *phydev, int i) { struct marvell_hw_stat stat = marvell_hw_stats[i]; @@ -1494,7 +1491,7 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i) val = phy_read_paged(phydev, stat.page, stat.reg); if (val < 0) { - ret = UINT64_MAX; + ret = U64_MAX; } else { val = val & ((1 << stat.bits) - 1); priv->stats[i] += val; diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c index 403b085f0a89..15352f987bdf 100644 --- a/drivers/net/phy/mdio-bitbang.c +++ b/drivers/net/phy/mdio-bitbang.c @@ -205,14 +205,6 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) return 0; } -static int mdiobb_reset(struct mii_bus *bus) -{ - struct mdiobb_ctrl *ctrl = bus->priv; - if (ctrl->reset) - ctrl->reset(bus); - return 0; -} - struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl) { struct mii_bus *bus; @@ -225,7 +217,6 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl) bus->read = mdiobb_read; bus->write = mdiobb_write; - bus->reset = mdiobb_reset; bus->priv = ctrl; return bus; diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c index 1861f387820d..863496fa5d13 100644 --- a/drivers/net/phy/mdio-boardinfo.c +++ b/drivers/net/phy/mdio-boardinfo.c @@ -30,17 +30,20 @@ void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus, struct mdio_board_info *bi)) { struct mdio_board_entry *be; + struct mdio_board_entry *tmp; struct mdio_board_info *bi; int ret; mutex_lock(&mdio_board_lock); - list_for_each_entry(be, &mdio_board_list, list) { + list_for_each_entry_safe(be, tmp, &mdio_board_list, list) { bi = &be->board_info; if (strcmp(bus->id, bi->bus_id)) continue; + mutex_unlock(&mdio_board_lock); ret = cb(bus, bi); + mutex_lock(&mdio_board_lock); if (ret) continue; diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 4333c6e14742..b501221819e1 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -24,8 +24,10 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/platform_device.h> +#include <linux/mdio-bitbang.h> +#include <linux/mdio-gpio.h> #include <linux/gpio.h> -#include <linux/platform_data/mdio-gpio.h> +#include <linux/gpio/consumer.h> #include <linux/of_gpio.h> #include <linux/of_mdio.h> @@ -35,37 +37,22 @@ struct mdio_gpio_info { struct gpio_desc *mdc, *mdio, *mdo; }; -static void *mdio_gpio_of_get_data(struct platform_device *pdev) +static int mdio_gpio_get_data(struct device *dev, + struct mdio_gpio_info *bitbang) { - struct device_node *np = pdev->dev.of_node; - struct mdio_gpio_platform_data *pdata; - enum of_gpio_flags flags; - int ret; - - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return NULL; - - ret = of_get_gpio_flags(np, 0, &flags); - if (ret < 0) - return NULL; - - pdata->mdc = ret; - pdata->mdc_active_low = flags & OF_GPIO_ACTIVE_LOW; - - ret = of_get_gpio_flags(np, 1, &flags); - if (ret < 0) - return NULL; - pdata->mdio = ret; - pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW; - - ret = of_get_gpio_flags(np, 2, &flags); - if (ret > 0) { - pdata->mdo = ret; - pdata->mdo_active_low = flags & OF_GPIO_ACTIVE_LOW; - } - - return pdata; + bitbang->mdc = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDC, + GPIOD_OUT_LOW); + if (IS_ERR(bitbang->mdc)) + return PTR_ERR(bitbang->mdc); + + bitbang->mdio = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDIO, + GPIOD_IN); + if (IS_ERR(bitbang->mdio)) + return PTR_ERR(bitbang->mdio); + + bitbang->mdo = devm_gpiod_get_index_optional(dev, NULL, MDIO_GPIO_MDO, + GPIOD_OUT_LOW); + return PTR_ERR_OR_ZERO(bitbang->mdo); } static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) @@ -125,78 +112,28 @@ static const struct mdiobb_ops mdio_gpio_ops = { }; static struct mii_bus *mdio_gpio_bus_init(struct device *dev, - struct mdio_gpio_platform_data *pdata, + struct mdio_gpio_info *bitbang, int bus_id) { struct mii_bus *new_bus; - struct mdio_gpio_info *bitbang; - int i; - int mdc, mdio, mdo; - unsigned long mdc_flags = GPIOF_OUT_INIT_LOW; - unsigned long mdio_flags = GPIOF_DIR_IN; - unsigned long mdo_flags = GPIOF_OUT_INIT_HIGH; - - bitbang = devm_kzalloc(dev, sizeof(*bitbang), GFP_KERNEL); - if (!bitbang) - goto out; bitbang->ctrl.ops = &mdio_gpio_ops; - bitbang->ctrl.reset = pdata->reset; - mdc = pdata->mdc; - bitbang->mdc = gpio_to_desc(mdc); - if (pdata->mdc_active_low) - mdc_flags = GPIOF_OUT_INIT_HIGH | GPIOF_ACTIVE_LOW; - mdio = pdata->mdio; - bitbang->mdio = gpio_to_desc(mdio); - if (pdata->mdio_active_low) - mdio_flags |= GPIOF_ACTIVE_LOW; - mdo = pdata->mdo; - if (mdo) { - bitbang->mdo = gpio_to_desc(mdo); - if (pdata->mdo_active_low) - mdo_flags = GPIOF_OUT_INIT_LOW | GPIOF_ACTIVE_LOW; - } new_bus = alloc_mdio_bitbang(&bitbang->ctrl); if (!new_bus) - goto out; - - new_bus->name = "GPIO Bitbanged MDIO", + return NULL; - new_bus->phy_mask = pdata->phy_mask; - new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask; - memcpy(new_bus->irq, pdata->irqs, sizeof(new_bus->irq)); + new_bus->name = "GPIO Bitbanged MDIO"; new_bus->parent = dev; - if (new_bus->phy_mask == ~0) - goto out_free_bus; - - for (i = 0; i < PHY_MAX_ADDR; i++) - if (!new_bus->irq[i]) - new_bus->irq[i] = PHY_POLL; - if (bus_id != -1) snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id); else strncpy(new_bus->id, "gpio", MII_BUS_ID_SIZE); - if (devm_gpio_request_one(dev, mdc, mdc_flags, "mdc")) - goto out_free_bus; - - if (devm_gpio_request_one(dev, mdio, mdio_flags, "mdio")) - goto out_free_bus; - - if (mdo && devm_gpio_request_one(dev, mdo, mdo_flags, "mdo")) - goto out_free_bus; - dev_set_drvdata(dev, new_bus); return new_bus; - -out_free_bus: - free_mdio_bitbang(new_bus); -out: - return NULL; } static void mdio_gpio_bus_deinit(struct device *dev) @@ -216,26 +153,29 @@ static void mdio_gpio_bus_destroy(struct device *dev) static int mdio_gpio_probe(struct platform_device *pdev) { - struct mdio_gpio_platform_data *pdata; + struct mdio_gpio_info *bitbang; struct mii_bus *new_bus; int ret, bus_id; + bitbang = devm_kzalloc(&pdev->dev, sizeof(*bitbang), GFP_KERNEL); + if (!bitbang) + return -ENOMEM; + + ret = mdio_gpio_get_data(&pdev->dev, bitbang); + if (ret) + return ret; + if (pdev->dev.of_node) { - pdata = mdio_gpio_of_get_data(pdev); bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio"); if (bus_id < 0) { dev_warn(&pdev->dev, "failed to get alias id\n"); bus_id = 0; } } else { - pdata = dev_get_platdata(&pdev->dev); bus_id = pdev->id; } - if (!pdata) - return -ENODEV; - - new_bus = mdio_gpio_bus_init(&pdev->dev, pdata, bus_id); + new_bus = mdio_gpio_bus_init(&pdev->dev, bitbang, bus_id); if (!new_bus) return -ENODEV; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index f41b224a9cdb..de31c5170a5b 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -650,9 +650,6 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data) } } -#ifndef UINT64_MAX -#define UINT64_MAX (u64)(~((u64)0)) -#endif static u64 kszphy_get_stat(struct phy_device *phydev, int i) { struct kszphy_hw_stat stat = kszphy_hw_stats[i]; @@ -662,7 +659,7 @@ static u64 kszphy_get_stat(struct phy_device *phydev, int i) val = phy_read(phydev, stat.reg); if (val < 0) { - ret = UINT64_MAX; + ret = U64_MAX; } else { val = val & ((1 << stat.bits) - 1); priv->stats[i] += val; diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index a97ac8c12c4c..2d67937866a3 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -21,6 +21,8 @@ #include <linux/phy.h> #include <linux/microchipphy.h> #include <linux/delay.h> +#include <linux/of.h> +#include <dt-bindings/net/microchip-lan78xx.h> #define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>" #define DRIVER_DESC "Microchip LAN88XX PHY driver" @@ -225,6 +227,8 @@ static int lan88xx_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; struct lan88xx_priv *priv; + u32 led_modes[4]; + int len; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -232,6 +236,27 @@ static int lan88xx_probe(struct phy_device *phydev) priv->wolopts = 0; + len = of_property_read_variable_u32_array(dev->of_node, + "microchip,led-modes", + led_modes, + 0, + ARRAY_SIZE(led_modes)); + if (len >= 0) { + u32 reg = 0; + int i; + + for (i = 0; i < len; i++) { + if (led_modes[i] > 15) + return -EINVAL; + reg |= led_modes[i] << (i * 4); + } + for (; i < ARRAY_SIZE(led_modes); i++) + reg |= LAN78XX_FORCE_LED_OFF << (i * 4); + (void)phy_write(phydev, LAN78XX_PHY_LED_MODE_SELECT, reg); + } else if (len == -EOVERFLOW) { + return -EINVAL; + } + /* these values can be used to identify internal PHY */ priv->chip_id = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_ID); priv->chip_rev = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_REV); diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index be399d645224..c328208388da 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -168,9 +168,6 @@ static void smsc_get_strings(struct phy_device *phydev, u8 *data) } } -#ifndef UINT64_MAX -#define UINT64_MAX (u64)(~((u64)0)) -#endif static u64 smsc_get_stat(struct phy_device *phydev, int i) { struct smsc_hw_stat stat = smsc_hw_stats[i]; @@ -179,7 +176,7 @@ static u64 smsc_get_stat(struct phy_device *phydev, int i) val = phy_read(phydev, stat.reg); if (val < 0) - ret = UINT64_MAX; + ret = U64_MAX; else ret = val; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ddb6bf85a59c..9dbd390ace34 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2942,7 +2942,7 @@ static int team_device_event(struct notifier_block *unused, case NETDEV_CHANGE: if (netif_running(port->dev)) team_port_change_check(port, - !!netif_carrier_ok(port->dev)); + !!netif_oper_up(port->dev)); break; case NETDEV_UNREGISTER: team_del_slave(port->team->dev, dev); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ef33950a45d9..d3c04ab9752a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -248,11 +248,11 @@ struct veth { __be16 h_vlan_TCI; }; -bool tun_is_xdp_buff(void *ptr) +bool tun_is_xdp_frame(void *ptr) { return (unsigned long)ptr & TUN_XDP_FLAG; } -EXPORT_SYMBOL(tun_is_xdp_buff); +EXPORT_SYMBOL(tun_is_xdp_frame); void *tun_xdp_to_ptr(void *ptr) { @@ -525,11 +525,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, rcu_read_lock(); - /* We may get a very small possibility of OOO during switching, not - * worth to optimize.*/ - if (tun->numqueues == 1 || tfile->detached) - goto unlock; - e = tun_flow_find(head, rxhash); if (likely(e)) { /* TODO: keep queueing to old queue until it's empty? */ @@ -548,7 +543,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, spin_unlock_bh(&tun->lock); } -unlock: rcu_read_unlock(); } @@ -660,10 +654,10 @@ void tun_ptr_free(void *ptr) { if (!ptr) return; - if (tun_is_xdp_buff(ptr)) { - struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + if (tun_is_xdp_frame(ptr)) { + struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); - put_page(virt_to_head_page(xdp->data)); + xdp_return_frame(xdpf); } else { __skb_array_destroy_skb(ptr); } @@ -854,6 +848,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file, tun->dev, tfile->queue_index); if (err < 0) goto out; + err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err < 0) { + xdp_rxq_info_unreg(&tfile->xdp_rxq); + goto out; + } err = 0; } @@ -1290,21 +1290,13 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_get_stats64 = tun_net_get_stats64, }; -static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame) { struct tun_struct *tun = netdev_priv(dev); - struct xdp_buff *buff = xdp->data_hard_start; - int headroom = xdp->data - xdp->data_hard_start; struct tun_file *tfile; u32 numqueues; int ret = 0; - /* Assure headroom is available and buff is properly aligned */ - if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp))) - return -ENOSPC; - - *buff = *xdp; - rcu_read_lock(); numqueues = READ_ONCE(tun->numqueues); @@ -1318,7 +1310,7 @@ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) /* Encode the XDP flag into lowest bit for consumer to differ * XDP buffer from sk_buff. */ - if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) { + if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) { this_cpu_inc(tun->pcpu_stats->tx_dropped); ret = -ENOSPC; } @@ -1328,6 +1320,16 @@ out: return ret; } +static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) +{ + struct xdp_frame *frame = convert_to_xdp_frame(xdp); + + if (unlikely(!frame)) + return -EOVERFLOW; + + return tun_xdp_xmit(dev, frame); +} + static void tun_xdp_flush(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -1675,7 +1677,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, case XDP_TX: get_page(alloc_frag->page); alloc_frag->offset += buflen; - if (tun_xdp_xmit(tun->dev, &xdp)) + if (tun_xdp_tx(tun->dev, &xdp)) goto err_redirect; tun_xdp_flush(tun->dev); rcu_read_unlock(); @@ -1683,6 +1685,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, return NULL; case XDP_PASS: delta = orig_data - xdp.data; + len = xdp.data_end - xdp.data; break; default: bpf_warn_invalid_xdp_action(act); @@ -1703,7 +1706,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, } skb_reserve(skb, pad - delta); - skb_put(skb, len + delta); + skb_put(skb, len); get_page(alloc_frag->page); alloc_frag->offset += buflen; @@ -1924,10 +1927,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, rcu_read_unlock(); } - rcu_read_lock(); - if (!rcu_dereference(tun->steering_prog)) + /* Compute the costly rx hash only if needed for flow updates. + * We may get a very small possibility of OOO during switching, not + * worth to optimize. + */ + if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 && + !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); - rcu_read_unlock(); if (frags) { /* Exercise flow dissector code path. */ @@ -1996,11 +2002,11 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t tun_put_user_xdp(struct tun_struct *tun, struct tun_file *tfile, - struct xdp_buff *xdp, + struct xdp_frame *xdp_frame, struct iov_iter *iter) { int vnet_hdr_sz = 0; - size_t size = xdp->data_end - xdp->data; + size_t size = xdp_frame->len; struct tun_pcpu_stats *stats; size_t ret; @@ -2016,7 +2022,7 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); } - ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz; + ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; stats = get_cpu_ptr(tun->pcpu_stats); u64_stats_update_begin(&stats->syncp); @@ -2184,11 +2190,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, return err; } - if (tun_is_xdp_buff(ptr)) { - struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + if (tun_is_xdp_frame(ptr)) { + struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); - ret = tun_put_user_xdp(tun, tfile, xdp, to); - put_page(virt_to_head_page(xdp->data)); + ret = tun_put_user_xdp(tun, tfile, xdpf, to); + xdp_return_frame(xdpf); } else { struct sk_buff *skb = ptr; @@ -2427,10 +2433,10 @@ out_free: static int tun_ptr_peek_len(void *ptr) { if (likely(ptr)) { - if (tun_is_xdp_buff(ptr)) { - struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + if (tun_is_xdp_frame(ptr)) { + struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); - return xdp->data_end - xdp->data; + return xdpf->len; } return __skb_array_len_with_tag(ptr); } else { diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index f28bd74ac275..418b0904cecb 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -111,6 +111,7 @@ config USB_LAN78XX select MII select PHYLIB select MICROCHIP_PHY + select FIXED_PHY help This option adds support for Microchip LAN78XX based USB 2 & USB 3 10/100/1000 Ethernet adapters. diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 0867f7275852..91761436709a 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -36,13 +36,14 @@ #include <linux/irq.h> #include <linux/irqchip/chained_irq.h> #include <linux/microchipphy.h> -#include <linux/phy.h> +#include <linux/phy_fixed.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> #include "lan78xx.h" #define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>" #define DRIVER_DESC "LAN78XX USB 3.0 Gigabit Ethernet Devices" #define DRIVER_NAME "lan78xx" -#define DRIVER_VERSION "1.0.6" #define TX_TIMEOUT_JIFFIES (5 * HZ) #define THROTTLE_JIFFIES (HZ / 8) @@ -278,6 +279,30 @@ struct lan78xx_statstage64 { u64 eee_tx_lpi_time; }; +static u32 lan78xx_regs[] = { + ID_REV, + INT_STS, + HW_CFG, + PMT_CTL, + E2P_CMD, + E2P_DATA, + USB_STATUS, + VLAN_TYPE, + MAC_CR, + MAC_RX, + MAC_TX, + FLOW, + ERR_STS, + MII_ACC, + MII_DATA, + EEE_TX_LPI_REQ_DLY, + EEE_TW_TX_SYS, + EEE_TX_LPI_REM_DLY, + WUCSR +}; + +#define PHY_REG_SIZE (32 * sizeof(u32)) + struct lan78xx_net; struct lan78xx_priv { @@ -1477,7 +1502,6 @@ static void lan78xx_get_drvinfo(struct net_device *net, struct lan78xx_net *dev = netdev_priv(net); strncpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strncpy(info->version, DRIVER_VERSION, sizeof(info->version)); usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info)); } @@ -1605,6 +1629,34 @@ exit: return ret; } +static int lan78xx_get_regs_len(struct net_device *netdev) +{ + if (!netdev->phydev) + return (sizeof(lan78xx_regs)); + else + return (sizeof(lan78xx_regs) + PHY_REG_SIZE); +} + +static void +lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs, + void *buf) +{ + u32 *data = buf; + int i, j; + struct lan78xx_net *dev = netdev_priv(netdev); + + /* Read Device/MAC registers */ + for (i = 0; i < (sizeof(lan78xx_regs) / sizeof(u32)); i++) + lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]); + + if (!netdev->phydev) + return; + + /* Read PHY registers */ + for (j = 0; j < 32; i++, j++) + data[i] = phy_read(netdev->phydev, j); +} + static const struct ethtool_ops lan78xx_ethtool_ops = { .get_link = lan78xx_get_link, .nway_reset = phy_ethtool_nway_reset, @@ -1625,6 +1677,8 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { .set_pauseparam = lan78xx_set_pause, .get_link_ksettings = lan78xx_get_link_ksettings, .set_link_ksettings = lan78xx_set_link_ksettings, + .get_regs_len = lan78xx_get_regs_len, + .get_regs = lan78xx_get_regs, }; static int lan78xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) @@ -1652,34 +1706,31 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) addr[5] = (addr_hi >> 8) & 0xFF; if (!is_valid_ether_addr(addr)) { - /* reading mac address from EEPROM or OTP */ - if ((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0) || - (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0)) { - if (is_valid_ether_addr(addr)) { - /* eeprom values are valid so use them */ - netif_dbg(dev, ifup, dev->net, - "MAC address read from EEPROM"); - } else { - /* generate random MAC */ - random_ether_addr(addr); - netif_dbg(dev, ifup, dev->net, - "MAC address set to random addr"); - } - - addr_lo = addr[0] | (addr[1] << 8) | - (addr[2] << 16) | (addr[3] << 24); - addr_hi = addr[4] | (addr[5] << 8); - - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + if (!eth_platform_get_mac_address(&dev->udev->dev, addr)) { + /* valid address present in Device Tree */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from Device Tree"); + } else if (((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0) || + (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0)) && + is_valid_ether_addr(addr)) { + /* eeprom values are valid so use them */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from EEPROM"); } else { /* generate random MAC */ random_ether_addr(addr); netif_dbg(dev, ifup, dev->net, "MAC address set to random addr"); } + + addr_lo = addr[0] | (addr[1] << 8) | + (addr[2] << 16) | (addr[3] << 24); + addr_hi = addr[4] | (addr[5] << 8); + + ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); } ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); @@ -1762,6 +1813,7 @@ done: static int lan78xx_mdio_init(struct lan78xx_net *dev) { + struct device_node *node; int ret; dev->mdiobus = mdiobus_alloc(); @@ -1790,7 +1842,13 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) break; } - ret = mdiobus_register(dev->mdiobus); + node = of_get_child_by_name(dev->udev->dev.of_node, "mdio"); + if (node) { + ret = of_mdiobus_register(dev->mdiobus, node); + of_node_put(node); + } else { + ret = mdiobus_register(dev->mdiobus); + } if (ret) { netdev_err(dev->net, "can't register MDIO bus\n"); goto exit1; @@ -2003,52 +2061,91 @@ static int ksz9031rnx_fixup(struct phy_device *phydev) return 1; } -static int lan78xx_phy_init(struct lan78xx_net *dev) +static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev) { + u32 buf; int ret; - u32 mii_adv; + struct fixed_phy_status fphy_status = { + .link = 1, + .speed = SPEED_1000, + .duplex = DUPLEX_FULL, + }; struct phy_device *phydev; phydev = phy_find_first(dev->mdiobus); if (!phydev) { - netdev_err(dev->net, "no PHY found\n"); - return -EIO; - } - - if ((dev->chipid == ID_REV_CHIP_ID_7800_) || - (dev->chipid == ID_REV_CHIP_ID_7850_)) { - phydev->is_internal = true; - dev->interface = PHY_INTERFACE_MODE_GMII; - - } else if (dev->chipid == ID_REV_CHIP_ID_7801_) { + netdev_dbg(dev->net, "PHY Not Found!! Registering Fixed PHY\n"); + phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1, + NULL); + if (IS_ERR(phydev)) { + netdev_err(dev->net, "No PHY/fixed_PHY found\n"); + return NULL; + } + netdev_dbg(dev->net, "Registered FIXED PHY\n"); + dev->interface = PHY_INTERFACE_MODE_RGMII; + ret = lan78xx_write_reg(dev, MAC_RGMII_ID, + MAC_RGMII_ID_TXC_DELAY_EN_); + ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); + ret = lan78xx_read_reg(dev, HW_CFG, &buf); + buf |= HW_CFG_CLK125_EN_; + buf |= HW_CFG_REFCLK25_EN_; + ret = lan78xx_write_reg(dev, HW_CFG, buf); + } else { if (!phydev->drv) { netdev_err(dev->net, "no PHY driver found\n"); - return -EIO; + return NULL; } - dev->interface = PHY_INTERFACE_MODE_RGMII; - /* external PHY fixup for KSZ9031RNX */ ret = phy_register_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0, ksz9031rnx_fixup); if (ret < 0) { - netdev_err(dev->net, "fail to register fixup\n"); - return ret; + netdev_err(dev->net, "Failed to register fixup for PHY_KSZ9031RNX\n"); + return NULL; } /* external PHY fixup for LAN8835 */ ret = phy_register_fixup_for_uid(PHY_LAN8835, 0xfffffff0, lan8835_fixup); if (ret < 0) { - netdev_err(dev->net, "fail to register fixup\n"); - return ret; + netdev_err(dev->net, "Failed to register fixup for PHY_LAN8835\n"); + return NULL; } /* add more external PHY fixup here if needed */ phydev->is_internal = false; - } else { - netdev_err(dev->net, "unknown ID found\n"); - ret = -EIO; - goto error; + } + return phydev; +} + +static int lan78xx_phy_init(struct lan78xx_net *dev) +{ + int ret; + u32 mii_adv; + struct phy_device *phydev; + + switch (dev->chipid) { + case ID_REV_CHIP_ID_7801_: + phydev = lan7801_phy_init(dev); + if (!phydev) { + netdev_err(dev->net, "lan7801: PHY Init Failed"); + return -EIO; + } + break; + + case ID_REV_CHIP_ID_7800_: + case ID_REV_CHIP_ID_7850_: + phydev = phy_find_first(dev->mdiobus); + if (!phydev) { + netdev_err(dev->net, "no PHY found\n"); + return -EIO; + } + phydev->is_internal = true; + dev->interface = PHY_INTERFACE_MODE_GMII; + break; + + default: + netdev_err(dev->net, "Unknown CHIP ID found\n"); + return -EIO; } /* if phyirq is not set, use polling mode in phylib */ @@ -2067,6 +2164,16 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) if (ret) { netdev_err(dev->net, "can't attach PHY to %s\n", dev->mdiobus->id); + if (dev->chipid == ID_REV_CHIP_ID_7801_) { + if (phy_is_pseudo_fixed_link(phydev)) { + fixed_phy_unregister(phydev); + } else { + phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, + 0xfffffff0); + phy_unregister_fixup_for_uid(PHY_LAN8835, + 0xfffffff0); + } + } return -EIO; } @@ -2079,17 +2186,33 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control); phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv); + if (phydev->mdio.dev.of_node) { + u32 reg; + int len; + + len = of_property_count_elems_of_size(phydev->mdio.dev.of_node, + "microchip,led-modes", + sizeof(u32)); + if (len >= 0) { + /* Ensure the appropriate LEDs are enabled */ + lan78xx_read_reg(dev, HW_CFG, ®); + reg &= ~(HW_CFG_LED0_EN_ | + HW_CFG_LED1_EN_ | + HW_CFG_LED2_EN_ | + HW_CFG_LED3_EN_); + reg |= (len > 0) * HW_CFG_LED0_EN_ | + (len > 1) * HW_CFG_LED1_EN_ | + (len > 2) * HW_CFG_LED2_EN_ | + (len > 3) * HW_CFG_LED3_EN_; + lan78xx_write_reg(dev, HW_CFG, reg); + } + } + genphy_config_aneg(phydev); dev->fc_autoneg = phydev->autoneg; return 0; - -error: - phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0); - phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0); - - return ret; } static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size) @@ -3487,6 +3610,7 @@ static void lan78xx_disconnect(struct usb_interface *intf) struct lan78xx_net *dev; struct usb_device *udev; struct net_device *net; + struct phy_device *phydev; dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); @@ -3495,12 +3619,16 @@ static void lan78xx_disconnect(struct usb_interface *intf) udev = interface_to_usbdev(intf); net = dev->net; + phydev = net->phydev; phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0); phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0); phy_disconnect(net->phydev); + if (phy_is_pseudo_fixed_link(phydev)) + fixed_phy_unregister(phydev); + unregister_netdev(net); cancel_delayed_work_sync(&dev->wq); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 770422e953f7..f34794a76c4d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -419,46 +419,51 @@ static void virtnet_xdp_flush(struct net_device *dev) virtqueue_kick(sq->vq); } -static bool __virtnet_xdp_xmit(struct virtnet_info *vi, - struct xdp_buff *xdp) +static int __virtnet_xdp_xmit(struct virtnet_info *vi, + struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; - unsigned int len; + struct xdp_frame *xdpf_sent; struct send_queue *sq; + unsigned int len; unsigned int qp; - void *xdp_sent; int err; qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); sq = &vi->sq[qp]; /* Free up any pending old buffers before queueing new ones. */ - while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { - struct page *sent_page = virt_to_head_page(xdp_sent); + while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) + xdp_return_frame(xdpf_sent); - put_page(sent_page); - } + /* virtqueue want to use data area in-front of packet */ + if (unlikely(xdpf->metasize > 0)) + return -EOPNOTSUPP; - xdp->data -= vi->hdr_len; + if (unlikely(xdpf->headroom < vi->hdr_len)) + return -EOVERFLOW; + + /* Make room for virtqueue hdr (also change xdpf->headroom?) */ + xdpf->data -= vi->hdr_len; /* Zero header and leave csum up to XDP layers */ - hdr = xdp->data; + hdr = xdpf->data; memset(hdr, 0, vi->hdr_len); + xdpf->len += vi->hdr_len; - sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); + sg_init_one(sq->sg, xdpf->data, xdpf->len); - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); if (unlikely(err)) - return false; /* Caller handle free/refcnt */ + return -ENOSPC; /* Caller handle free/refcnt */ - return true; + return 0; } -static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; struct bpf_prog *xdp_prog; - bool sent; /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. @@ -467,10 +472,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) if (!xdp_prog) return -ENXIO; - sent = __virtnet_xdp_xmit(vi, xdp); - if (!sent) - return -ENOSPC; - return 0; + return __virtnet_xdp_xmit(vi, xdpf); } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) @@ -559,7 +561,6 @@ static struct sk_buff *receive_small(struct net_device *dev, struct page *page = virt_to_head_page(buf); unsigned int delta = 0; struct page *xdp_page; - bool sent; int err; len -= vi->hdr_len; @@ -568,6 +569,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; + struct xdp_frame *xdpf; struct xdp_buff xdp; void *orig_data; u32 act; @@ -608,10 +610,14 @@ static struct sk_buff *receive_small(struct net_device *dev, case XDP_PASS: /* Recalculate length in case bpf program changed it */ delta = orig_data - xdp.data; + len = xdp.data_end - xdp.data; break; case XDP_TX: - sent = __virtnet_xdp_xmit(vi, &xdp); - if (unlikely(!sent)) { + xdpf = convert_to_xdp_frame(&xdp); + if (unlikely(!xdpf)) + goto err_xdp; + err = __virtnet_xdp_xmit(vi, xdpf); + if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; } @@ -641,7 +647,7 @@ static struct sk_buff *receive_small(struct net_device *dev, goto err; } skb_reserve(skb, headroom - delta); - skb_put(skb, len + delta); + skb_put(skb, len); if (!delta) { buf += header_offset; memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); @@ -694,7 +700,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); - bool sent; int err; head_skb = NULL; @@ -702,6 +707,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { + struct xdp_frame *xdpf; struct page *xdp_page; struct xdp_buff xdp; void *data; @@ -756,6 +762,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, offset = xdp.data - page_address(xdp_page) - vi->hdr_len; + /* recalculate len if xdp.data or xdp.data_end were + * adjusted + */ + len = xdp.data_end - xdp.data + vi->hdr_len; /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); @@ -766,8 +776,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - sent = __virtnet_xdp_xmit(vi, &xdp); - if (unlikely(!sent)) { + xdpf = convert_to_xdp_frame(&xdp); + if (unlikely(!xdpf)) + goto err_xdp; + err = __virtnet_xdp_xmit(vi, xdpf); + if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) put_page(xdp_page); @@ -1312,6 +1325,13 @@ static int virtnet_open(struct net_device *dev) if (err < 0) return err; + err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err < 0) { + xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); + return err; + } + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0a2b180d138a..90b5f3900c22 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -48,6 +48,9 @@ static unsigned int vrf_net_id; struct net_vrf { struct rtable __rcu *rth; struct rt6_info __rcu *rt6; +#if IS_ENABLED(CONFIG_IPV6) + struct fib6_table *fib6_table; +#endif u32 tb_id; }; @@ -496,7 +499,6 @@ static int vrf_rt6_create(struct net_device *dev) int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM; struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); - struct fib6_table *rt6i_table; struct rt6_info *rt6; int rc = -ENOMEM; @@ -504,8 +506,8 @@ static int vrf_rt6_create(struct net_device *dev) if (!ipv6_mod_enabled()) return 0; - rt6i_table = fib6_new_table(net, vrf->tb_id); - if (!rt6i_table) + vrf->fib6_table = fib6_new_table(net, vrf->tb_id); + if (!vrf->fib6_table) goto out; /* create a dst for routing packets out a VRF device */ @@ -513,7 +515,6 @@ static int vrf_rt6_create(struct net_device *dev) if (!rt6) goto out; - rt6->rt6i_table = rt6i_table; rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); @@ -946,22 +947,8 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, int flags) { struct net_vrf *vrf = netdev_priv(dev); - struct fib6_table *table = NULL; - struct rt6_info *rt6; - - rcu_read_lock(); - - /* fib6_table does not have a refcnt and can not be freed */ - rt6 = rcu_dereference(vrf->rt6); - if (likely(rt6)) - table = rt6->rt6i_table; - - rcu_read_unlock(); - - if (!table) - return NULL; - return ip6_pol_route(net, table, ifindex, fl6, skb, flags); + return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index fab7a4db249e..aee0e60471f1 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2085,9 +2085,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, local_ip = vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; md->gbp = skb->mark; - ttl = vxlan->cfg.ttl; - if (!ttl && vxlan_addr_multicast(dst)) - ttl = 1; + if (flags & VXLAN_F_TTL_INHERIT) { + ttl = ip_tunnel_get_ttl(old_iph, skb); + } else { + ttl = vxlan->cfg.ttl; + if (!ttl && vxlan_addr_multicast(dst)) + ttl = 1; + } tos = vxlan->cfg.tos; if (tos == 1) @@ -2709,6 +2713,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, }, [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, + [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG }, }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -3254,6 +3259,12 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_VXLAN_TTL]) conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]); + if (data[IFLA_VXLAN_TTL_INHERIT]) { + if (changelink) + return -EOPNOTSUPP; + conf->flags |= VXLAN_F_TTL_INHERIT; + } + if (data[IFLA_VXLAN_LABEL]) conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) & IPV6_FLOWLABEL_MASK; diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c index b3285175f20f..78ccf936d356 100644 --- a/drivers/ptp/ptp_pch.c +++ b/drivers/ptp/ptp_pch.c @@ -452,7 +452,6 @@ static int ptp_pch_adjtime(struct ptp_clock_info *ptp, s64 delta) static int ptp_pch_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { u64 ns; - u32 remainder; unsigned long flags; struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps); struct pch_ts_regs __iomem *regs = pch_dev->regs; @@ -461,8 +460,7 @@ static int ptp_pch_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) ns = pch_systime_read(regs); spin_unlock_irqrestore(&pch_dev->register_lock, flags); - ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); - ts->tv_nsec = remainder; + *ts = ns_to_timespec64(ns); return 0; } @@ -474,8 +472,7 @@ static int ptp_pch_settime(struct ptp_clock_info *ptp, struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps); struct pch_ts_regs __iomem *regs = pch_dev->regs; - ns = ts->tv_sec * 1000000000ULL; - ns += ts->tv_nsec; + ns = timespec64_to_ns(ts); spin_lock_irqsave(&pch_dev->register_lock, flags); pch_systime_write(regs, ns); diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 0ee8f33efb54..2d9fe7e4ee40 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1928,6 +1928,8 @@ lcs_portno_store (struct device *dev, struct device_attribute *attr, const char return -EINVAL; /* TODO: sanity checks */ card->portno = value; + if (card->dev) + card->dev->dev_port = card->portno; return count; @@ -2158,6 +2160,7 @@ lcs_new_device(struct ccwgroup_device *ccwgdev) card->dev = dev; card->dev->ml_priv = card; card->dev->netdev_ops = &lcs_netdev_ops; + card->dev->dev_port = card->portno; memcpy(card->dev->dev_addr, card->mac, LCS_MAC_LENGTH); #ifdef CONFIG_IP_MULTICAST if (!lcs_check_multicast_support(card)) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 78b98b3e7efa..2a5fec55bf60 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -148,6 +148,7 @@ struct qeth_perf_stats { unsigned int tx_csum; unsigned int tx_lin; unsigned int tx_linfail; + unsigned int rx_csum; }; /* Routing stuff */ @@ -712,9 +713,6 @@ enum qeth_discipline_id { struct qeth_discipline { const struct device_type *devtype; - void (*start_poll)(struct ccw_device *, int, unsigned long); - qdio_handler_t *input_handler; - qdio_handler_t *output_handler; int (*process_rx_buffer)(struct qeth_card *card, int budget, int *done); int (*recover)(void *ptr); int (*setup) (struct ccwgroup_device *); @@ -780,9 +778,9 @@ struct qeth_card { struct qeth_card_options options; wait_queue_head_t wait_q; - spinlock_t vlanlock; spinlock_t mclock; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + struct mutex vid_list_mutex; /* vid_list */ struct list_head vid_list; DECLARE_HASHTABLE(mac_htable, 4); DECLARE_HASHTABLE(ip_htable, 4); @@ -867,6 +865,32 @@ static inline int qeth_get_ip_version(struct sk_buff *skb) } } +static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb, + u8 flags) +{ + if ((card->dev->features & NETIF_F_RXCSUM) && + (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (card->options.performance_stats) + card->perf_stats.rx_csum++; + } else { + skb->ip_summed = CHECKSUM_NONE; + } +} + +static inline void qeth_tx_csum(struct sk_buff *skb, u8 *flags, int ipv) +{ + *flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ; + if ((ipv == 4 && ip_hdr(skb)->protocol == IPPROTO_UDP) || + (ipv == 6 && ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)) + *flags |= QETH_HDR_EXT_UDP; + if (ipv == 4) { + /* some HW requires combined L3+L4 csum offload: */ + *flags |= QETH_HDR_EXT_CSUM_HDR_REQ; + ip_hdr(skb)->check = 0; + } +} + static inline void qeth_put_buffer_pool_entry(struct qeth_card *card, struct qeth_buffer_pool_entry *entry) { @@ -879,6 +903,27 @@ static inline int qeth_is_diagass_supported(struct qeth_card *card, return card->info.diagass_support & (__u32)cmd; } +int qeth_send_simple_setassparms_prot(struct qeth_card *card, + enum qeth_ipa_funcs ipa_func, + u16 cmd_code, long data, + enum qeth_prot_versions prot); +/* IPv4 variant */ +static inline int qeth_send_simple_setassparms(struct qeth_card *card, + enum qeth_ipa_funcs ipa_func, + u16 cmd_code, long data) +{ + return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code, + data, QETH_PROT_IPV4); +} + +static inline int qeth_send_simple_setassparms_v6(struct qeth_card *card, + enum qeth_ipa_funcs ipa_func, + u16 cmd_code, long data) +{ + return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code, + data, QETH_PROT_IPV6); +} + extern struct qeth_discipline qeth_l2_discipline; extern struct qeth_discipline qeth_l3_discipline; extern const struct attribute_group *qeth_generic_attr_groups[]; @@ -921,13 +966,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *, struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *, struct qeth_hdr **); void qeth_schedule_recovery(struct qeth_card *); -void qeth_qdio_start_poll(struct ccw_device *, int, unsigned long); int qeth_poll(struct napi_struct *napi, int budget); -void qeth_qdio_input_handler(struct ccw_device *, - unsigned int, unsigned int, int, - int, unsigned long); -void qeth_qdio_output_handler(struct ccw_device *, unsigned int, - int, int, int, unsigned long); void qeth_clear_ipacmd_list(struct qeth_card *); int qeth_qdio_clear_card(struct qeth_card *, int); void qeth_clear_working_pool_list(struct qeth_card *); @@ -979,8 +1018,6 @@ int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot); void qeth_trace_features(struct qeth_card *); void qeth_close_dev(struct qeth_card *); -int qeth_send_simple_setassparms(struct qeth_card *, enum qeth_ipa_funcs, - __u16, long); int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16, long, int (*reply_cb)(struct qeth_card *, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index dffd820731f2..06415b6a8f68 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1467,13 +1467,13 @@ static int qeth_setup_card(struct qeth_card *card) card->lan_online = 0; card->read_or_write_problem = 0; card->dev = NULL; - spin_lock_init(&card->vlanlock); spin_lock_init(&card->mclock); spin_lock_init(&card->lock); spin_lock_init(&card->ip_lock); spin_lock_init(&card->thread_mask_lock); mutex_init(&card->conf_mutex); mutex_init(&card->discipline_mutex); + mutex_init(&card->vid_list_mutex); card->thread_start_mask = 0; card->thread_allowed_mask = 0; card->thread_running_mask = 0; @@ -3588,15 +3588,14 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) } } -void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue, - unsigned long card_ptr) +static void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue, + unsigned long card_ptr) { struct qeth_card *card = (struct qeth_card *)card_ptr; if (card->dev && (card->dev->flags & IFF_UP)) napi_schedule(&card->napi); } -EXPORT_SYMBOL_GPL(qeth_qdio_start_poll); int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) { @@ -3698,9 +3697,10 @@ out: return; } -void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err, - unsigned int queue, int first_elem, int count, - unsigned long card_ptr) +static void qeth_qdio_input_handler(struct ccw_device *ccwdev, + unsigned int qdio_err, int queue, + int first_elem, int count, + unsigned long card_ptr) { struct qeth_card *card = (struct qeth_card *)card_ptr; @@ -3711,14 +3711,12 @@ void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err, qeth_qdio_cq_handler(card, qdio_err, queue, first_elem, count); else if (qdio_err) qeth_schedule_recovery(card); - - } -EXPORT_SYMBOL_GPL(qeth_qdio_input_handler); -void qeth_qdio_output_handler(struct ccw_device *ccwdev, - unsigned int qdio_error, int __queue, int first_element, - int count, unsigned long card_ptr) +static void qeth_qdio_output_handler(struct ccw_device *ccwdev, + unsigned int qdio_error, int __queue, + int first_element, int count, + unsigned long card_ptr) { struct qeth_card *card = (struct qeth_card *) card_ptr; struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue]; @@ -3787,7 +3785,6 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev, card->perf_stats.outbound_handler_time += qeth_get_micros() - card->perf_stats.outbound_handler_start_time; } -EXPORT_SYMBOL_GPL(qeth_qdio_output_handler); /* We cannot use outbound queue 3 for unicast packets on HiperSockets */ static inline int qeth_cut_iqd_prio(struct qeth_card *card, int queue_num) @@ -4995,7 +4992,7 @@ static int qeth_qdio_establish(struct qeth_card *card) goto out_free_in_sbals; } for (i = 0; i < card->qdio.no_in_queues; ++i) - queue_start_poll[i] = card->discipline->start_poll; + queue_start_poll[i] = qeth_qdio_start_poll; qeth_qdio_establish_cq(card, in_sbal_ptrs, queue_start_poll); @@ -5019,8 +5016,8 @@ static int qeth_qdio_establish(struct qeth_card *card) init_data.qib_param_field = qib_param_field; init_data.no_input_qs = card->qdio.no_in_queues; init_data.no_output_qs = card->qdio.no_out_queues; - init_data.input_handler = card->discipline->input_handler; - init_data.output_handler = card->discipline->output_handler; + init_data.input_handler = qeth_qdio_input_handler; + init_data.output_handler = qeth_qdio_output_handler; init_data.queue_start_poll_array = queue_start_poll; init_data.int_parm = (unsigned long) card; init_data.input_sbal_addr_array = (void **) in_sbal_ptrs; @@ -5204,6 +5201,11 @@ retriable: rc = qeth_query_ipassists(card, QETH_PROT_IPV4); if (rc == -ENOMEM) goto out; + if (qeth_is_supported(card, IPA_IPV6)) { + rc = qeth_query_ipassists(card, QETH_PROT_IPV6); + if (rc == -ENOMEM) + goto out; + } if (qeth_is_supported(card, IPA_SETADAPTERPARMS)) { rc = qeth_query_setadapterparms(card); if (rc < 0) { @@ -5511,26 +5513,26 @@ int qeth_send_setassparms(struct qeth_card *card, } EXPORT_SYMBOL_GPL(qeth_send_setassparms); -int qeth_send_simple_setassparms(struct qeth_card *card, - enum qeth_ipa_funcs ipa_func, - __u16 cmd_code, long data) +int qeth_send_simple_setassparms_prot(struct qeth_card *card, + enum qeth_ipa_funcs ipa_func, + u16 cmd_code, long data, + enum qeth_prot_versions prot) { int rc; int length = 0; struct qeth_cmd_buffer *iob; - QETH_CARD_TEXT(card, 4, "simassp4"); + QETH_CARD_TEXT_(card, 4, "simassp%i", prot); if (data) length = sizeof(__u32); - iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, - length, QETH_PROT_IPV4); + iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, length, prot); if (!iob) return -ENOMEM; rc = qeth_send_setassparms(card, iob, length, data, qeth_setassparms_cb, NULL); return rc; } -EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms); +EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms_prot); static void qeth_unregister_dbf_views(void) { @@ -6008,7 +6010,8 @@ static struct { {"tx lin"}, {"tx linfail"}, {"cq handler count"}, - {"cq handler time"} + {"cq handler time"}, + {"rx csum"} }; int qeth_core_get_sset_count(struct net_device *dev, int stringset) @@ -6070,6 +6073,7 @@ void qeth_core_get_ethtool_stats(struct net_device *dev, data[35] = card->perf_stats.tx_linfail; data[36] = card->perf_stats.cq_cnt; data[37] = card->perf_stats.cq_time; + data[38] = card->perf_stats.rx_csum; } EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats); @@ -6326,14 +6330,15 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card, static int qeth_ipa_checksum_run_cmd(struct qeth_card *card, enum qeth_ipa_funcs ipa_func, __u16 cmd_code, long data, - struct qeth_checksum_cmd *chksum_cb) + struct qeth_checksum_cmd *chksum_cb, + enum qeth_prot_versions prot) { struct qeth_cmd_buffer *iob; int rc = -ENOMEM; QETH_CARD_TEXT(card, 4, "chkdocmd"); iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, - sizeof(__u32), QETH_PROT_IPV4); + sizeof(__u32), prot); if (iob) rc = qeth_send_setassparms(card, iob, sizeof(__u32), data, qeth_ipa_checksum_run_cmd_cb, @@ -6341,16 +6346,17 @@ static int qeth_ipa_checksum_run_cmd(struct qeth_card *card, return rc; } -static int qeth_send_checksum_on(struct qeth_card *card, int cstype) +static int qeth_send_checksum_on(struct qeth_card *card, int cstype, + enum qeth_prot_versions prot) { - const __u32 required_features = QETH_IPA_CHECKSUM_IP_HDR | - QETH_IPA_CHECKSUM_UDP | - QETH_IPA_CHECKSUM_TCP; + u32 required_features = QETH_IPA_CHECKSUM_UDP | QETH_IPA_CHECKSUM_TCP; struct qeth_checksum_cmd chksum_cb; int rc; + if (prot == QETH_PROT_IPV4) + required_features |= QETH_IPA_CHECKSUM_IP_HDR; rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_START, 0, - &chksum_cb); + &chksum_cb, prot); if (!rc) { if ((required_features & chksum_cb.supported) != required_features) @@ -6362,37 +6368,42 @@ static int qeth_send_checksum_on(struct qeth_card *card, int cstype) QETH_CARD_IFNAME(card)); } if (rc) { - qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_STOP, 0); + qeth_send_simple_setassparms_prot(card, cstype, + IPA_CMD_ASS_STOP, 0, prot); dev_warn(&card->gdev->dev, - "Starting HW checksumming for %s failed, using SW checksumming\n", - QETH_CARD_IFNAME(card)); + "Starting HW IPv%d checksumming for %s failed, using SW checksumming\n", + prot, QETH_CARD_IFNAME(card)); return rc; } rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_ENABLE, - chksum_cb.supported, &chksum_cb); + chksum_cb.supported, &chksum_cb, + prot); if (!rc) { if ((required_features & chksum_cb.enabled) != required_features) rc = -EIO; } if (rc) { - qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_STOP, 0); + qeth_send_simple_setassparms_prot(card, cstype, + IPA_CMD_ASS_STOP, 0, prot); dev_warn(&card->gdev->dev, - "Enabling HW checksumming for %s failed, using SW checksumming\n", - QETH_CARD_IFNAME(card)); + "Enabling HW IPv%d checksumming for %s failed, using SW checksumming\n", + prot, QETH_CARD_IFNAME(card)); return rc; } - dev_info(&card->gdev->dev, "HW Checksumming (%sbound) enabled\n", - cstype == IPA_INBOUND_CHECKSUM ? "in" : "out"); + dev_info(&card->gdev->dev, "HW Checksumming (%sbound IPv%d) enabled\n", + cstype == IPA_INBOUND_CHECKSUM ? "in" : "out", prot); return 0; } -static int qeth_set_ipa_csum(struct qeth_card *card, int on, int cstype) +static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype, + enum qeth_prot_versions prot) { - int rc = (on) ? qeth_send_checksum_on(card, cstype) - : qeth_send_simple_setassparms(card, cstype, - IPA_CMD_ASS_STOP, 0); + int rc = (on) ? qeth_send_checksum_on(card, cstype, prot) + : qeth_send_simple_setassparms_prot(card, cstype, + IPA_CMD_ASS_STOP, 0, + prot); return rc ? -EIO : 0; } @@ -6419,8 +6430,31 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on) return rc; } -#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO) +static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on) +{ + int rc_ipv4 = (on) ? -EOPNOTSUPP : 0; + int rc_ipv6; + + if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM)) + rc_ipv4 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM, + QETH_PROT_IPV4); + if (!qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6)) + /* no/one Offload Assist available, so the rc is trivial */ + return rc_ipv4; + rc_ipv6 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM, + QETH_PROT_IPV6); + + if (on) + /* enable: success if any Assist is active */ + return (rc_ipv6) ? rc_ipv4 : 0; + + /* disable: failure if any Assist is still active */ + return (rc_ipv6) ? rc_ipv6 : rc_ipv4; +} + +#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \ + NETIF_F_IPV6_CSUM) /** * qeth_recover_features() - Restore device features after recovery * @dev: the recovering net_device @@ -6455,16 +6489,19 @@ int qeth_set_features(struct net_device *dev, netdev_features_t features) QETH_DBF_HEX(SETUP, 2, &features, sizeof(features)); if ((changed & NETIF_F_IP_CSUM)) { - rc = qeth_set_ipa_csum(card, - features & NETIF_F_IP_CSUM ? 1 : 0, - IPA_OUTBOUND_CHECKSUM); + rc = qeth_set_ipa_csum(card, features & NETIF_F_IP_CSUM, + IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV4); if (rc) changed ^= NETIF_F_IP_CSUM; } - if ((changed & NETIF_F_RXCSUM)) { - rc = qeth_set_ipa_csum(card, - features & NETIF_F_RXCSUM ? 1 : 0, - IPA_INBOUND_CHECKSUM); + if (changed & NETIF_F_IPV6_CSUM) { + rc = qeth_set_ipa_csum(card, features & NETIF_F_IPV6_CSUM, + IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV6); + if (rc) + changed ^= NETIF_F_IPV6_CSUM; + } + if (changed & NETIF_F_RXCSUM) { + rc = qeth_set_ipa_rx_csum(card, features & NETIF_F_RXCSUM); if (rc) changed ^= NETIF_F_RXCSUM; } @@ -6491,7 +6528,10 @@ netdev_features_t qeth_fix_features(struct net_device *dev, QETH_DBF_TEXT(SETUP, 2, "fixfeat"); if (!qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) features &= ~NETIF_F_IP_CSUM; - if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM)) + if (!qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6)) + features &= ~NETIF_F_IPV6_CSUM; + if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM) && + !qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6)) features &= ~NETIF_F_RXCSUM; if (!qeth_is_supported(card, IPA_OUTBOUND_TSO)) features &= ~NETIF_F_TSO; diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index f4d1ec0b8f5a..878e62f35169 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -246,6 +246,8 @@ enum qeth_ipa_funcs { IPA_QUERY_ARP_ASSIST = 0x00040000L, IPA_INBOUND_TSO = 0x00080000L, IPA_OUTBOUND_TSO = 0x00100000L, + IPA_INBOUND_CHECKSUM_V6 = 0x00400000L, + IPA_OUTBOUND_CHECKSUM_V6 = 0x00800000L, }; /* SETIP/DELIP IPA Command: ***************************************************/ diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index ae81534de912..c3f18afb368b 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -144,6 +144,8 @@ static ssize_t qeth_dev_portno_store(struct device *dev, goto out; } card->info.portno = portno; + if (card->dev) + card->dev->dev_port = portno; out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index b8079f2a65b3..a7cb37da6a21 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -17,7 +17,6 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/etherdevice.h> -#include <linux/ip.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/hashtable.h> @@ -195,23 +194,6 @@ static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb) return RTN_UNSPEC; } -static void qeth_l2_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - - /* tcph->check contains already the pseudo hdr checksum - * so just set the header flags - */ - if (iph->protocol == IPPROTO_UDP) - hdr->hdr.l2.flags[1] |= QETH_HDR_EXT_UDP; - hdr->hdr.l2.flags[1] |= QETH_HDR_EXT_CSUM_TRANSP_REQ | - QETH_HDR_EXT_CSUM_HDR_REQ; - iph->check = 0; - if (card->options.performance_stats) - card->perf_stats.tx_csum++; -} - static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb, int cast_type, unsigned int data_len) { @@ -297,12 +279,13 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, static void qeth_l2_process_vlans(struct qeth_card *card) { struct qeth_vlan_vid *id; + QETH_CARD_TEXT(card, 3, "L2prcvln"); - spin_lock_bh(&card->vlanlock); + mutex_lock(&card->vid_list_mutex); list_for_each_entry(id, &card->vid_list, list) { qeth_l2_send_setdelvlan(card, id->vid, IPA_CMD_SETVLAN); } - spin_unlock_bh(&card->vlanlock); + mutex_unlock(&card->vid_list_mutex); } static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, @@ -319,7 +302,7 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, QETH_CARD_TEXT(card, 3, "aidREC"); return 0; } - id = kmalloc(sizeof(struct qeth_vlan_vid), GFP_ATOMIC); + id = kmalloc(sizeof(*id), GFP_KERNEL); if (id) { id->vid = vid; rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN); @@ -327,9 +310,9 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, kfree(id); return rc; } - spin_lock_bh(&card->vlanlock); + mutex_lock(&card->vid_list_mutex); list_add_tail(&id->list, &card->vid_list); - spin_unlock_bh(&card->vlanlock); + mutex_unlock(&card->vid_list_mutex); } else { return -ENOMEM; } @@ -348,7 +331,7 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, QETH_CARD_TEXT(card, 3, "kidREC"); return 0; } - spin_lock_bh(&card->vlanlock); + mutex_lock(&card->vid_list_mutex); list_for_each_entry(id, &card->vid_list, list) { if (id->vid == vid) { list_del(&id->list); @@ -356,7 +339,7 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, break; } } - spin_unlock_bh(&card->vlanlock); + mutex_unlock(&card->vid_list_mutex); if (tmpid) { rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN); kfree(tmpid); @@ -423,15 +406,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, switch (hdr->hdr.l2.id) { case QETH_HEADER_TYPE_LAYER2: skb->protocol = eth_type_trans(skb, skb->dev); - if ((card->dev->features & NETIF_F_RXCSUM) - && ((hdr->hdr.l2.flags[1] & - (QETH_HDR_EXT_CSUM_HDR_REQ | - QETH_HDR_EXT_CSUM_TRANSP_REQ)) == - (QETH_HDR_EXT_CSUM_HDR_REQ | - QETH_HDR_EXT_CSUM_TRANSP_REQ))) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; + qeth_rx_csum(card, skb, hdr->hdr.l2.flags[1]); if (skb->protocol == htons(ETH_P_802_2)) *((__u32 *)skb->cb) = ++card->seqno.pkt_seqno; len = skb->len; @@ -464,7 +439,6 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, static int qeth_l2_request_initial_mac(struct qeth_card *card) { int rc = 0; - char vendor_pre[] = {0x02, 0x00, 0x00}; QETH_DBF_TEXT(SETUP, 2, "l2reqmac"); QETH_DBF_TEXT_(SETUP, 2, "doL2%s", CARD_BUS_ID(card)); @@ -484,16 +458,20 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card) card->info.type == QETH_CARD_TYPE_OSX || card->info.guestlan) { rc = qeth_setadpparms_change_macaddr(card); - if (rc) { - QETH_DBF_MESSAGE(2, "couldn't get MAC address on " - "device %s: x%x\n", CARD_BUS_ID(card), rc); - QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc); - return rc; - } - } else { - eth_random_addr(card->dev->dev_addr); - memcpy(card->dev->dev_addr, vendor_pre, 3); + if (!rc) + goto out; + QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %s: x%x\n", + CARD_BUS_ID(card), rc); + QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc); + /* fall back once more: */ } + + /* some devices don't support a custom MAC address: */ + if (card->info.type == QETH_CARD_TYPE_OSM || + card->info.type == QETH_CARD_TYPE_OSX) + return (rc) ? rc : -EADDRNOTAVAIL; + eth_hw_addr_random(card->dev); + out: QETH_DBF_HEX(SETUP, 2, card->dev->dev_addr, card->dev->addr_len); return 0; @@ -685,7 +663,8 @@ out: } static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb, - struct qeth_qdio_out_q *queue, int cast_type) + struct qeth_qdio_out_q *queue, int cast_type, + int ipv) { int push_len = sizeof(struct qeth_hdr); unsigned int elements, nr_frags; @@ -723,8 +702,11 @@ static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb, hdr_elements = 1; } qeth_l2_fill_header(hdr, skb, cast_type, skb->len - push_len); - if (skb->ip_summed == CHECKSUM_PARTIAL) - qeth_l2_hdr_csum(card, hdr, skb); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv); + if (card->options.performance_stats) + card->perf_stats.tx_csum++; + } elements = qeth_get_elements_no(card, skb, hdr_elements, 0); if (!elements) { @@ -776,6 +758,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, { struct qeth_card *card = dev->ml_priv; int cast_type = qeth_l2_get_cast_type(card, skb); + int ipv = qeth_get_ip_version(skb); struct qeth_qdio_out_q *queue; int tx_bytes = skb->len; int rc; @@ -783,7 +766,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, if (card->qdio.do_prio_queueing || (cast_type && card->info.is_multicast_different)) queue = card->qdio.out_qs[qeth_get_priority_queue(card, skb, - qeth_get_ip_version(skb), cast_type)]; + ipv, cast_type)]; else queue = card->qdio.out_qs[card->qdio.default_out_queue]; @@ -806,7 +789,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, rc = qeth_l2_xmit_iqd(card, skb, queue, cast_type); break; default: - rc = qeth_l2_xmit_osa(card, skb, queue, cast_type); + rc = qeth_l2_xmit_osa(card, skb, queue, cast_type, ipv); } if (!rc) { @@ -983,6 +966,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->mtu = card->info.initial_mtu; card->dev->min_mtu = 64; card->dev->max_mtu = ETH_MAX_MTU; + card->dev->dev_port = card->info.portno; card->dev->netdev_ops = &qeth_l2_netdev_ops; if (card->info.type == QETH_CARD_TYPE_OSN) { card->dev->ethtool_ops = &qeth_l2_osn_ops; @@ -1011,10 +995,15 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->hw_features |= NETIF_F_IP_CSUM; card->dev->vlan_features |= NETIF_F_IP_CSUM; } - if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM)) { - card->dev->hw_features |= NETIF_F_RXCSUM; - card->dev->vlan_features |= NETIF_F_RXCSUM; - } + } + if (qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6)) { + card->dev->hw_features |= NETIF_F_IPV6_CSUM; + card->dev->vlan_features |= NETIF_F_IPV6_CSUM; + } + if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM) || + qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6)) { + card->dev->hw_features |= NETIF_F_RXCSUM; + card->dev->vlan_features |= NETIF_F_RXCSUM; } card->info.broadcast_capable = 1; @@ -1315,9 +1304,6 @@ static int qeth_l2_control_event(struct qeth_card *card, struct qeth_discipline qeth_l2_discipline = { .devtype = &qeth_l2_devtype, - .start_poll = qeth_qdio_start_poll, - .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, - .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, .process_rx_buffer = qeth_l2_process_inbound_buffer, .recover = qeth_l2_recover, .setup = qeth_l2_probe_device, diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c1a16a74aa83..e7fa479adf47 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -735,22 +735,6 @@ static int qeth_l3_setadapter_parms(struct qeth_card *card) return rc; } -static int qeth_l3_send_simple_setassparms_ipv6(struct qeth_card *card, - enum qeth_ipa_funcs ipa_func, __u16 cmd_code) -{ - int rc; - struct qeth_cmd_buffer *iob; - - QETH_CARD_TEXT(card, 4, "simassp6"); - iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, - 0, QETH_PROT_IPV6); - if (!iob) - return -ENOMEM; - rc = qeth_send_setassparms(card, iob, 0, 0, - qeth_setassparms_cb, NULL); - return rc; -} - static int qeth_l3_start_ipa_arp_processing(struct qeth_card *card) { int rc; @@ -851,14 +835,6 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card) QETH_CARD_TEXT(card, 3, "softipv6"); - rc = qeth_query_ipassists(card, QETH_PROT_IPV6); - if (rc) { - dev_err(&card->gdev->dev, - "Activating IPv6 support for %s failed\n", - QETH_CARD_IFNAME(card)); - return rc; - } - if (card->info.type == QETH_CARD_TYPE_IQD) goto out; @@ -870,16 +846,16 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card) QETH_CARD_IFNAME(card)); return rc; } - rc = qeth_l3_send_simple_setassparms_ipv6(card, IPA_IPV6, - IPA_CMD_ASS_START); + rc = qeth_send_simple_setassparms_v6(card, IPA_IPV6, + IPA_CMD_ASS_START, 0); if (rc) { dev_err(&card->gdev->dev, "Activating IPv6 support for %s failed\n", QETH_CARD_IFNAME(card)); return rc; } - rc = qeth_l3_send_simple_setassparms_ipv6(card, IPA_PASSTHRU, - IPA_CMD_ASS_START); + rc = qeth_send_simple_setassparms_v6(card, IPA_PASSTHRU, + IPA_CMD_ASS_START, 0); if (rc) { dev_warn(&card->gdev->dev, "Enabling the passthrough mode for %s failed\n", @@ -1293,91 +1269,6 @@ static void qeth_l3_add_multicast_ipv6(struct qeth_card *card) in6_dev_put(in6_dev); } -static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, - unsigned short vid) -{ - struct in_device *in_dev; - struct in_ifaddr *ifa; - struct qeth_ipaddr *addr; - struct net_device *netdev; - - QETH_CARD_TEXT(card, 4, "frvaddr4"); - - netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); - if (!netdev) - return; - in_dev = in_dev_get(netdev); - if (!in_dev) - return; - - addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4); - if (!addr) - goto out; - - spin_lock_bh(&card->ip_lock); - - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { - addr->u.a4.addr = be32_to_cpu(ifa->ifa_address); - addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask); - addr->type = QETH_IP_TYPE_NORMAL; - qeth_l3_delete_ip(card, addr); - } - - spin_unlock_bh(&card->ip_lock); - - kfree(addr); -out: - in_dev_put(in_dev); -} - -static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, - unsigned short vid) -{ - struct inet6_dev *in6_dev; - struct inet6_ifaddr *ifa; - struct qeth_ipaddr *addr; - struct net_device *netdev; - - QETH_CARD_TEXT(card, 4, "frvaddr6"); - - netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); - if (!netdev) - return; - - in6_dev = in6_dev_get(netdev); - if (!in6_dev) - return; - - addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6); - if (!addr) - goto out; - - spin_lock_bh(&card->ip_lock); - - list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { - memcpy(&addr->u.a6.addr, &ifa->addr, - sizeof(struct in6_addr)); - addr->u.a6.pfxlen = ifa->prefix_len; - addr->type = QETH_IP_TYPE_NORMAL; - qeth_l3_delete_ip(card, addr); - } - - spin_unlock_bh(&card->ip_lock); - - kfree(addr); -out: - in6_dev_put(in6_dev); -} - -static void qeth_l3_free_vlan_addresses(struct qeth_card *card, - unsigned short vid) -{ - rcu_read_lock(); - qeth_l3_free_vlan_addresses4(card, vid); - qeth_l3_free_vlan_addresses6(card, vid); - rcu_read_unlock(); -} - static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { @@ -1398,8 +1289,6 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, QETH_CARD_TEXT(card, 3, "kidREC"); return 0; } - /* unregister IP addresses of vlan device */ - qeth_l3_free_vlan_addresses(card, vid); clear_bit(vid, card->active_vlans); qeth_l3_set_rx_mode(dev); return 0; @@ -1454,17 +1343,7 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag); } - if (card->dev->features & NETIF_F_RXCSUM) { - if ((hdr->hdr.l3.ext_flags & - (QETH_HDR_EXT_CSUM_HDR_REQ | - QETH_HDR_EXT_CSUM_TRANSP_REQ)) == - (QETH_HDR_EXT_CSUM_HDR_REQ | - QETH_HDR_EXT_CSUM_TRANSP_REQ)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; - } else - skb->ip_summed = CHECKSUM_NONE; + qeth_rx_csum(card, skb, hdr->hdr.l3.ext_flags); } static int qeth_l3_process_inbound_buffer(struct qeth_card *card, @@ -2210,23 +2089,6 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, rcu_read_unlock(); } -static void qeth_l3_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - - /* tcph->check contains already the pseudo hdr checksum - * so just set the header flags - */ - if (iph->protocol == IPPROTO_UDP) - hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_UDP; - hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ | - QETH_HDR_EXT_CSUM_HDR_REQ; - iph->check = 0; - if (card->options.performance_stats) - card->perf_stats.tx_csum++; -} - static void qeth_tso_fill_header(struct qeth_card *card, struct qeth_hdr *qhdr, struct sk_buff *skb) { @@ -2418,8 +2280,11 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, } } - if (skb->ip_summed == CHECKSUM_PARTIAL) - qeth_l3_hdr_csum(card, hdr, new_skb); + if (new_skb->ip_summed == CHECKSUM_PARTIAL) { + qeth_tx_csum(new_skb, &hdr->hdr.l3.ext_flags, ipv); + if (card->options.performance_stats) + card->perf_stats.tx_csum++; + } } elements = use_tso ? @@ -2620,28 +2485,32 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) (card->info.link_type == QETH_LINK_TYPE_HSTR)) { pr_info("qeth_l3: ignoring TR device\n"); return -ENODEV; - } else { - card->dev = alloc_etherdev(0); - if (!card->dev) - return -ENODEV; - card->dev->netdev_ops = &qeth_l3_osa_netdev_ops; - - /*IPv6 address autoconfiguration stuff*/ - qeth_l3_get_unique_id(card); - if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD)) - card->dev->dev_id = card->info.unique_id & - 0xffff; - - card->dev->hw_features |= NETIF_F_SG; - card->dev->vlan_features |= NETIF_F_SG; - - if (!card->info.guestlan) { - card->dev->features |= NETIF_F_SG; - card->dev->hw_features |= NETIF_F_TSO | - NETIF_F_RXCSUM | NETIF_F_IP_CSUM; - card->dev->vlan_features |= NETIF_F_TSO | - NETIF_F_RXCSUM | NETIF_F_IP_CSUM; - } + } + + card->dev = alloc_etherdev(0); + if (!card->dev) + return -ENODEV; + card->dev->netdev_ops = &qeth_l3_osa_netdev_ops; + + /*IPv6 address autoconfiguration stuff*/ + qeth_l3_get_unique_id(card); + if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD)) + card->dev->dev_id = card->info.unique_id & 0xffff; + + card->dev->hw_features |= NETIF_F_SG; + card->dev->vlan_features |= NETIF_F_SG; + + if (!card->info.guestlan) { + card->dev->features |= NETIF_F_SG; + card->dev->hw_features |= NETIF_F_TSO | + NETIF_F_RXCSUM | NETIF_F_IP_CSUM; + card->dev->vlan_features |= NETIF_F_TSO | + NETIF_F_RXCSUM | NETIF_F_IP_CSUM; + } + + if (qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6)) { + card->dev->hw_features |= NETIF_F_IPV6_CSUM; + card->dev->vlan_features |= NETIF_F_IPV6_CSUM; } } else if (card->info.type == QETH_CARD_TYPE_IQD) { card->dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN, @@ -2663,6 +2532,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->mtu = card->info.initial_mtu; card->dev->min_mtu = 64; card->dev->max_mtu = ETH_MAX_MTU; + card->dev->dev_port = card->info.portno; card->dev->ethtool_ops = &qeth_l3_ethtool_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | @@ -2960,9 +2830,6 @@ static int qeth_l3_control_event(struct qeth_card *card, struct qeth_discipline qeth_l3_discipline = { .devtype = &qeth_l3_devtype, - .start_poll = qeth_qdio_start_poll, - .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, - .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, .process_rx_buffer = qeth_l3_process_inbound_buffer, .recover = qeth_l3_recover, .setup = qeth_l3_probe_device, diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index 026182d3b27c..224d7ddeeb76 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -134,6 +134,13 @@ struct knav_dma_chan { static struct knav_dma_pool_device *kdev; +static bool device_ready; +bool knav_dma_device_ready(void) +{ + return device_ready; +} +EXPORT_SYMBOL_GPL(knav_dma_device_ready); + static bool check_config(struct knav_dma_chan *chan, struct knav_dma_cfg *cfg) { if (!memcmp(&chan->cfg, cfg, sizeof(*cfg))) @@ -773,6 +780,7 @@ static int knav_dma_probe(struct platform_device *pdev) debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL, &knav_dma_debug_ops); + device_ready = true; return ret; } diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h index 905b974d1bdc..56866ba4cfc4 100644 --- a/drivers/soc/ti/knav_qmss.h +++ b/drivers/soc/ti/knav_qmss.h @@ -292,6 +292,11 @@ struct knav_queue { struct list_head list; }; +enum qmss_version { + QMSS, + QMSS_66AK2G, +}; + struct knav_device { struct device *dev; unsigned base_id; @@ -305,6 +310,7 @@ struct knav_device { struct list_head pools; struct list_head pdsps; struct list_head qmgrs; + enum qmss_version version; }; struct knav_range_ops { diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index 77d6b5c03aae..419365a8d1c2 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -42,6 +42,15 @@ static DEFINE_MUTEX(knav_dev_lock); #define KNAV_QUEUE_PUSH_REG_INDEX 4 #define KNAV_QUEUE_POP_REG_INDEX 5 +/* Queue manager register indices in DTS for QMSS in K2G NAVSS. + * There are no status and vbusm push registers on this version + * of QMSS. Push registers are same as pop, So all indices above 1 + * are to be re-defined + */ +#define KNAV_L_QUEUE_CONFIG_REG_INDEX 1 +#define KNAV_L_QUEUE_REGION_REG_INDEX 2 +#define KNAV_L_QUEUE_PUSH_REG_INDEX 3 + /* PDSP register indices in DTS */ #define KNAV_QUEUE_PDSP_IRAM_REG_INDEX 0 #define KNAV_QUEUE_PDSP_REGS_REG_INDEX 1 @@ -65,6 +74,13 @@ static DEFINE_MUTEX(knav_dev_lock); */ const char *knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"}; +static bool device_ready; +bool knav_qmss_device_ready(void) +{ + return device_ready; +} +EXPORT_SYMBOL_GPL(knav_qmss_device_ready); + /** * knav_queue_notify: qmss queue notfier call * @@ -1169,8 +1185,12 @@ static int knav_queue_setup_link_ram(struct knav_device *kdev) dev_dbg(kdev->dev, "linkram0: dma:%pad, virt:%p, size:%x\n", &block->dma, block->virt, block->size); writel_relaxed((u32)block->dma, &qmgr->reg_config->link_ram_base0); - writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0); - + if (kdev->version == QMSS_66AK2G) + writel_relaxed(block->size, + &qmgr->reg_config->link_ram_size0); + else + writel_relaxed(block->size - 1, + &qmgr->reg_config->link_ram_size0); block++; if (!block->size) continue; @@ -1387,42 +1407,64 @@ static int knav_queue_init_qmgrs(struct knav_device *kdev, qmgr->reg_peek = knav_queue_map_reg(kdev, child, KNAV_QUEUE_PEEK_REG_INDEX); - qmgr->reg_status = - knav_queue_map_reg(kdev, child, - KNAV_QUEUE_STATUS_REG_INDEX); + + if (kdev->version == QMSS) { + qmgr->reg_status = + knav_queue_map_reg(kdev, child, + KNAV_QUEUE_STATUS_REG_INDEX); + } + qmgr->reg_config = knav_queue_map_reg(kdev, child, + (kdev->version == QMSS_66AK2G) ? + KNAV_L_QUEUE_CONFIG_REG_INDEX : KNAV_QUEUE_CONFIG_REG_INDEX); qmgr->reg_region = knav_queue_map_reg(kdev, child, + (kdev->version == QMSS_66AK2G) ? + KNAV_L_QUEUE_REGION_REG_INDEX : KNAV_QUEUE_REGION_REG_INDEX); + qmgr->reg_push = knav_queue_map_reg(kdev, child, - KNAV_QUEUE_PUSH_REG_INDEX); - qmgr->reg_pop = - knav_queue_map_reg(kdev, child, - KNAV_QUEUE_POP_REG_INDEX); + (kdev->version == QMSS_66AK2G) ? + KNAV_L_QUEUE_PUSH_REG_INDEX : + KNAV_QUEUE_PUSH_REG_INDEX); + + if (kdev->version == QMSS) { + qmgr->reg_pop = + knav_queue_map_reg(kdev, child, + KNAV_QUEUE_POP_REG_INDEX); + } - if (IS_ERR(qmgr->reg_peek) || IS_ERR(qmgr->reg_status) || + if (IS_ERR(qmgr->reg_peek) || + ((kdev->version == QMSS) && + (IS_ERR(qmgr->reg_status) || IS_ERR(qmgr->reg_pop))) || IS_ERR(qmgr->reg_config) || IS_ERR(qmgr->reg_region) || - IS_ERR(qmgr->reg_push) || IS_ERR(qmgr->reg_pop)) { + IS_ERR(qmgr->reg_push)) { dev_err(dev, "failed to map qmgr regs\n"); + if (kdev->version == QMSS) { + if (!IS_ERR(qmgr->reg_status)) + devm_iounmap(dev, qmgr->reg_status); + if (!IS_ERR(qmgr->reg_pop)) + devm_iounmap(dev, qmgr->reg_pop); + } if (!IS_ERR(qmgr->reg_peek)) devm_iounmap(dev, qmgr->reg_peek); - if (!IS_ERR(qmgr->reg_status)) - devm_iounmap(dev, qmgr->reg_status); if (!IS_ERR(qmgr->reg_config)) devm_iounmap(dev, qmgr->reg_config); if (!IS_ERR(qmgr->reg_region)) devm_iounmap(dev, qmgr->reg_region); if (!IS_ERR(qmgr->reg_push)) devm_iounmap(dev, qmgr->reg_push); - if (!IS_ERR(qmgr->reg_pop)) - devm_iounmap(dev, qmgr->reg_pop); devm_kfree(dev, qmgr); continue; } + /* Use same push register for pop as well */ + if (kdev->version == QMSS_66AK2G) + qmgr->reg_pop = qmgr->reg_push; + list_add_tail(&qmgr->list, &kdev->qmgrs); dev_info(dev, "added qmgr start queue %d, num of queues %d, reg_peek %p, reg_status %p, reg_config %p, reg_region %p, reg_push %p, reg_pop %p\n", qmgr->start_queue, qmgr->num_queues, @@ -1681,10 +1723,24 @@ static int knav_queue_init_queues(struct knav_device *kdev) return 0; } +/* Match table for of_platform binding */ +static const struct of_device_id keystone_qmss_of_match[] = { + { + .compatible = "ti,keystone-navigator-qmss", + }, + { + .compatible = "ti,66ak2g-navss-qm", + .data = (void *)QMSS_66AK2G, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, keystone_qmss_of_match); + static int knav_queue_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct device_node *qmgrs, *queue_pools, *regions, *pdsps; + const struct of_device_id *match; struct device *dev = &pdev->dev; u32 temp[2]; int ret; @@ -1700,6 +1756,10 @@ static int knav_queue_probe(struct platform_device *pdev) return -ENOMEM; } + match = of_match_device(of_match_ptr(keystone_qmss_of_match), dev); + if (match && match->data) + kdev->version = QMSS_66AK2G; + platform_set_drvdata(pdev, kdev); kdev->dev = dev; INIT_LIST_HEAD(&kdev->queue_ranges); @@ -1796,6 +1856,7 @@ static int knav_queue_probe(struct platform_device *pdev) debugfs_create_file("qmss", S_IFREG | S_IRUGO, NULL, NULL, &knav_queue_debug_ops); + device_ready = true; return 0; err: @@ -1815,13 +1876,6 @@ static int knav_queue_remove(struct platform_device *pdev) return 0; } -/* Match table for of_platform binding */ -static struct of_device_id keystone_qmss_of_match[] = { - { .compatible = "ti,keystone-navigator-qmss", }, - {}, -}; -MODULE_DEVICE_TABLE(of, keystone_qmss_of_match); - static struct platform_driver keystone_qmss_driver = { .probe = knav_queue_probe, .remove = knav_queue_remove, diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 986058a57917..c4b49fca4871 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -32,6 +32,7 @@ #include <linux/skbuff.h> #include <net/sock.h> +#include <net/xdp.h> #include "vhost.h" @@ -45,8 +46,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" #define VHOST_NET_WEIGHT 0x80000 /* Max number of packets transferred before requeueing the job. - * Using this limit prevents one virtqueue from starving rx. */ -#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2) + * Using this limit prevents one virtqueue from starving others with small + * pkts. + */ +#define VHOST_NET_PKT_WEIGHT 256 /* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 @@ -181,10 +184,10 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) static int vhost_net_buf_peek_len(void *ptr) { - if (tun_is_xdp_buff(ptr)) { - struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + if (tun_is_xdp_frame(ptr)) { + struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); - return xdp->data_end - xdp->data; + return xdpf->len; } return __skb_array_len_with_tag(ptr); @@ -586,7 +589,7 @@ static void handle_tx(struct vhost_net *net) vhost_zerocopy_signal_used(net, vq); vhost_net_tx_packet(net); if (unlikely(total_len >= VHOST_NET_WEIGHT) || - unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) { + unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } @@ -768,6 +771,7 @@ static void handle_rx(struct vhost_net *net) struct socket *sock; struct iov_iter fixup; __virtio16 num_buffers; + int recv_pkts = 0; mutex_lock_nested(&vq->mutex, 0); sock = vq->private_data; @@ -871,7 +875,8 @@ static void handle_rx(struct vhost_net *net) if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + if (unlikely(total_len >= VHOST_NET_WEIGHT) || + unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) { vhost_poll_queue(&vq->poll); goto out; } diff --git a/include/dt-bindings/net/microchip-lan78xx.h b/include/dt-bindings/net/microchip-lan78xx.h new file mode 100644 index 000000000000..0742ff075307 --- /dev/null +++ b/include/dt-bindings/net/microchip-lan78xx.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DT_BINDINGS_MICROCHIP_LAN78XX_H +#define _DT_BINDINGS_MICROCHIP_LAN78XX_H + +/* LED modes for LAN7800/LAN7850 embedded PHY */ + +#define LAN78XX_LINK_ACTIVITY 0 +#define LAN78XX_LINK_1000_ACTIVITY 1 +#define LAN78XX_LINK_100_ACTIVITY 2 +#define LAN78XX_LINK_10_ACTIVITY 3 +#define LAN78XX_LINK_100_1000_ACTIVITY 4 +#define LAN78XX_LINK_10_1000_ACTIVITY 5 +#define LAN78XX_LINK_10_100_ACTIVITY 6 +#define LAN78XX_DUPLEX_COLLISION 8 +#define LAN78XX_COLLISION 9 +#define LAN78XX_ACTIVITY 10 +#define LAN78XX_AUTONEG_FAULT 12 +#define LAN78XX_FORCE_LED_OFF 14 +#define LAN78XX_FORCE_LED_ON 15 + +#endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 469b20e1dd7e..38ebbc61ed99 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -22,6 +22,8 @@ struct perf_event; struct bpf_prog; struct bpf_map; struct sock; +struct seq_file; +struct btf; /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { @@ -44,10 +46,14 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); + void (*map_seq_show_elem)(struct bpf_map *map, void *key, + struct seq_file *m); + int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf, + u32 key_type_id, u32 value_type_id); }; struct bpf_map { - /* 1st cacheline with read-mostly members of which some + /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). */ const struct bpf_map_ops *ops ____cacheline_aligned; @@ -63,10 +69,13 @@ struct bpf_map { u32 pages; u32 id; int numa_node; + u32 btf_key_id; + u32 btf_value_id; + struct btf *btf; bool unpriv_array; - /* 7 bytes hole */ + /* 55 bytes hole */ - /* 2nd cacheline with misc members to avoid false sharing + /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ struct user_struct *user ____cacheline_aligned; @@ -101,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) return container_of(map, struct bpf_offloaded_map, map); } +static inline bool bpf_map_support_seq_show(const struct bpf_map *map) +{ + return map->ops->map_seq_show_elem && map->ops->map_check_btf; +} + extern const struct bpf_map_ops bpf_map_offload_ops; /* function argument constraints */ diff --git a/include/linux/btf.h b/include/linux/btf.h new file mode 100644 index 000000000000..a966dc6d61ee --- /dev/null +++ b/include/linux/btf.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#ifndef _LINUX_BTF_H +#define _LINUX_BTF_H 1 + +#include <linux/types.h> + +struct btf; +struct btf_type; +union bpf_attr; + +extern const struct file_operations btf_fops; + +void btf_put(struct btf *btf); +int btf_new_fd(const union bpf_attr *attr); +struct btf *btf_get_by_fd(int fd); +int btf_get_info_by_fd(const struct btf *btf, + const union bpf_attr *attr, + union bpf_attr __user *uattr); +/* Figure out the size of a type_id. If type_id is a modifier + * (e.g. const), it will be resolved to find out the type with size. + * + * For example: + * In describing "const void *", type_id is "const" and "const" + * refers to "void *". The return type will be "void *". + * + * If type_id is a simple "int", then return type will be "int". + * + * @btf: struct btf object + * @type_id: Find out the size of type_id. The type_id of the return + * type is set to *type_id. + * @ret_size: It can be NULL. If not NULL, the size of the return + * type is set to *ret_size. + * Return: The btf_type (resolved to another type with size info if needed). + * NULL is returned if type_id itself does not have size info + * (e.g. void) or it cannot be resolved to another type that + * has size info. + * *type_id and *ret_size will not be changed in the + * NULL return case. + */ +const struct btf_type *btf_type_id_size(const struct btf *btf, + u32 *type_id, + u32 *ret_size); +void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, + struct seq_file *m); + +#endif diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b32cd2062f18..f8a2245b70ac 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -312,6 +312,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * by kernel. Returns a negative error code or zero. * @get_fecparam: Get the network device Forward Error Correction parameters. * @set_fecparam: Set the network device Forward Error Correction parameters. + * @get_ethtool_phy_stats: Return extended statistics about the PHY device. + * This is only useful if the device maintains PHY statistics and + * cannot use the standard PHY library helpers. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -407,5 +410,7 @@ struct ethtool_ops { struct ethtool_fecparam *); int (*set_fecparam)(struct net_device *, struct ethtool_fecparam *); + void (*get_ethtool_phy_stats)(struct net_device *, + struct ethtool_stats *, u64 *); }; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index fc4e8f91b03d..4da8b2308174 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -30,6 +30,7 @@ struct sock; struct seccomp_data; struct bpf_prog_aux; struct xdp_rxq_info; +struct xdp_buff; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function @@ -500,14 +501,6 @@ struct bpf_skb_data_end { void *data_end; }; -struct xdp_buff { - void *data; - void *data_end; - void *data_meta; - void *data_hard_start; - struct xdp_rxq_info *rxq; -}; - struct sk_msg_buff { void *data; void *data_end; @@ -772,21 +765,6 @@ int xdp_do_redirect(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush_map(void); -/* Drivers not supporting XDP metadata can use this helper, which - * rejects any room expansion for metadata as a result. - */ -static __always_inline void -xdp_set_data_meta_invalid(struct xdp_buff *xdp) -{ - xdp->data_meta = xdp->data + 1; -} - -static __always_inline bool -xdp_data_meta_unsupported(const struct xdp_buff *xdp) -{ - return unlikely(xdp->data_meta > xdp->data); -} - void bpf_warn_invalid_xdp_action(u32 act); struct sock *do_sk_redirect_map(struct sk_buff *skb); diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 02639ebea2f0..585d27182425 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -93,11 +93,39 @@ static inline bool br_multicast_router(const struct net_device *dev) #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) bool br_vlan_enabled(const struct net_device *dev); +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid); +int br_vlan_get_info(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo); #else static inline bool br_vlan_enabled(const struct net_device *dev) { return false; } + +static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) +{ + return -1; +} + +static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo) +{ + return -1; +} +#endif + +#if IS_ENABLED(CONFIG_BRIDGE) +struct net_device *br_fdb_find_port(const struct net_device *br_dev, + const unsigned char *addr, + __u16 vid); +#else +static inline struct net_device * +br_fdb_find_port(const struct net_device *br_dev, + const unsigned char *addr, + __u16 vid) +{ + return NULL; +} #endif #endif diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 4cb7aeeafce0..2e55e4cdbd8a 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -21,7 +21,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; - void *fwd_priv; + void *accel_priv; struct vlan_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); @@ -61,10 +61,6 @@ extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); -extern void macvlan_count_rx(const struct macvlan_dev *vlan, - unsigned int len, bool success, - bool multicast); - extern void macvlan_dellink(struct net_device *dev, struct list_head *head); extern int macvlan_link_register(struct rtnl_link_ops *ops); @@ -86,4 +82,27 @@ macvlan_dev_real_dev(const struct net_device *dev) } #endif +static inline void *macvlan_accel_priv(struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + return macvlan->accel_priv; +} + +static inline bool macvlan_supports_dest_filter(struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + return macvlan->mode == MACVLAN_MODE_PRIVATE || + macvlan->mode == MACVLAN_MODE_VEPA || + macvlan->mode == MACVLAN_MODE_BRIDGE; +} + +static inline int macvlan_release_l2fw_offload(struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + macvlan->accel_priv = NULL; + return dev_uc_add(macvlan->lowerdev, dev->dev_addr); +} #endif /* _LINUX_IF_MACVLAN_H */ diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index fd00170b494f..3d2996dc7d85 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -22,7 +22,7 @@ #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); -bool tun_is_xdp_buff(void *ptr); +bool tun_is_xdp_frame(void *ptr); void *tun_xdp_to_ptr(void *ptr); void *tun_ptr_to_xdp(void *ptr); void tun_ptr_free(void *ptr); @@ -39,7 +39,7 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f) { return ERR_PTR(-EINVAL); } -static inline bool tun_is_xdp_buff(void *ptr) +static inline bool tun_is_xdp_frame(void *ptr) { return false; } diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index a8ac9cfa014c..5d71e8a8500f 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -33,8 +33,6 @@ struct mdiobb_ops { struct mdiobb_ctrl { const struct mdiobb_ops *ops; - /* reset callback */ - int (*reset)(struct mii_bus *bus); }; /* The returned bus is not yet registered with the phy layer. */ diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h new file mode 100644 index 000000000000..cea443a672cb --- /dev/null +++ b/include/linux/mdio-gpio.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_MDIO_GPIO_H +#define __LINUX_MDIO_GPIO_H + +#define MDIO_GPIO_MDC 0 +#define MDIO_GPIO_MDIO 1 +#define MDIO_GPIO_MDO 2 + +#endif diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h index 8f9c90379732..8c40128af240 100644 --- a/include/linux/microchipphy.h +++ b/include/linux/microchipphy.h @@ -70,6 +70,9 @@ #define LAN88XX_MMD3_CHIP_ID (32877) #define LAN88XX_MMD3_CHIP_REV (32878) +/* Registers specific to the LAN7800/LAN7850 embedded phy */ +#define LAN78XX_PHY_LED_MODE_SELECT (0x1D) + /* DSP registers */ #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG (0x806A) #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_ (0x2000) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1aad455538f4..b8918a1da11f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -356,22 +356,6 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 reserved_at_6[0x1a]; }; -struct mlx5_ifc_ipv4_layout_bits { - u8 reserved_at_0[0x60]; - - u8 ipv4[0x20]; -}; - -struct mlx5_ifc_ipv6_layout_bits { - u8 ipv6[16][0x8]; -}; - -union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { - struct mlx5_ifc_ipv6_layout_bits ipv6_layout; - struct mlx5_ifc_ipv4_layout_bits ipv4_layout; - u8 reserved_at_0[0x80]; -}; - struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index ec052491ba3d..193091537cb6 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -32,12 +32,29 @@ #ifndef MLX5_IFC_FPGA_H #define MLX5_IFC_FPGA_H +struct mlx5_ifc_ipv4_layout_bits { + u8 reserved_at_0[0x60]; + + u8 ipv4[0x20]; +}; + +struct mlx5_ifc_ipv6_layout_bits { + u8 ipv6[16][0x8]; +}; + +union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { + struct mlx5_ifc_ipv6_layout_bits ipv6_layout; + struct mlx5_ifc_ipv4_layout_bits ipv4_layout; + u8 reserved_at_0[0x80]; +}; + enum { MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9, }; enum { MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3, }; struct mlx5_ifc_fpga_shell_caps_bits { @@ -370,6 +387,27 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_tls_extended_cap_bits { + u8 aes_gcm_128[0x1]; + u8 aes_gcm_256[0x1]; + u8 reserved_at_2[0x1e]; + u8 reserved_at_20[0x20]; + u8 context_capacity_total[0x20]; + u8 context_capacity_rx[0x20]; + u8 context_capacity_tx[0x20]; + u8 reserved_at_a0[0x10]; + u8 tls_counter_size[0x10]; + u8 tls_counters_addr_low[0x20]; + u8 tls_counters_addr_high[0x20]; + u8 rx[0x1]; + u8 tx[0x1]; + u8 tls_v12[0x1]; + u8 tls_v13[0x1]; + u8 lro[0x1]; + u8 ipv6[0x1]; + u8 reserved_at_106[0x1a]; +}; + struct mlx5_ifc_ipsec_extended_cap_bits { u8 encapsulation[0x20]; @@ -519,4 +557,43 @@ struct mlx5_ifc_fpga_ipsec_sa { __be16 reserved2; } __packed; +enum fpga_tls_cmds { + CMD_SETUP_STREAM = 0x1001, + CMD_TEARDOWN_STREAM = 0x1002, +}; + +#define MLX5_TLS_1_2 (0) + +#define MLX5_TLS_ALG_AES_GCM_128 (0) +#define MLX5_TLS_ALG_AES_GCM_256 (1) + +struct mlx5_ifc_tls_cmd_bits { + u8 command_type[0x20]; + u8 ipv6[0x1]; + u8 direction_sx[0x1]; + u8 tls_version[0x2]; + u8 reserved[0x1c]; + u8 swid[0x20]; + u8 src_port[0x10]; + u8 dst_port[0x10]; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; + u8 tls_rcd_sn[0x40]; + u8 tcp_sn[0x20]; + u8 tls_implicit_iv[0x20]; + u8 tls_xor_iv[0x40]; + u8 encryption_key[0x100]; + u8 alg[4]; + u8 reserved2[0x1c]; + u8 reserved3[0x4a0]; +}; + +struct mlx5_ifc_tls_resp_bits { + u8 syndrome[0x20]; + u8 stream_id[0x20]; + u8 reserverd[0x40]; +}; + +#define MLX5_TLS_COMMAND_SIZE (0x100) + #endif /* MLX5_IFC_FPGA_H */ diff --git a/include/linux/net.h b/include/linux/net.h index 2248a052061d..6554d3ba4396 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -197,6 +197,7 @@ struct proto_ops { int offset, size_t size, int flags); int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, size_t size); + int (*set_rcvlowat)(struct sock *sk, int val); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index 29ed8fd6379a..db99240d00bd 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -103,11 +103,12 @@ enum { #define NET_DIM_PARAMS_NUM_PROFILES 5 /* Adaptive moderation profiles */ #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 #define NET_DIM_DEF_PROFILE_CQE 1 #define NET_DIM_DEF_PROFILE_EQE 1 /* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */ -#define NET_DIM_EQE_PROFILES { \ +#define NET_DIM_RX_EQE_PROFILES { \ {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ @@ -115,7 +116,7 @@ enum { {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ } -#define NET_DIM_CQE_PROFILES { \ +#define NET_DIM_RX_CQE_PROFILES { \ {2, 256}, \ {8, 128}, \ {16, 64}, \ @@ -123,32 +124,68 @@ enum { {64, 64} \ } +#define NET_DIM_TX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ +} + +#define NET_DIM_TX_CQE_PROFILES { \ + {5, 128}, \ + {8, 64}, \ + {16, 32}, \ + {32, 32}, \ + {64, 32} \ +} + static const struct net_dim_cq_moder -profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_EQE_PROFILES, - NET_DIM_CQE_PROFILES, +rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_RX_EQE_PROFILES, + NET_DIM_RX_CQE_PROFILES, }; -static inline struct net_dim_cq_moder net_dim_get_profile(u8 cq_period_mode, - int ix) +static const struct net_dim_cq_moder +tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_TX_EQE_PROFILES, + NET_DIM_TX_CQE_PROFILES, +}; + +static inline struct net_dim_cq_moder +net_dim_get_rx_moderation(u8 cq_period_mode, int ix) { - struct net_dim_cq_moder cq_moder; + struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; - cq_moder = profile[cq_period_mode][ix]; cq_moder.cq_period_mode = cq_period_mode; return cq_moder; } -static inline struct net_dim_cq_moder net_dim_get_def_profile(u8 rx_cq_period_mode) +static inline struct net_dim_cq_moder +net_dim_get_def_rx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); +} + +static inline struct net_dim_cq_moder +net_dim_get_tx_moderation(u8 cq_period_mode, int ix) { - int default_profile_ix; + struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} - if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE) - default_profile_ix = NET_DIM_DEF_PROFILE_CQE; - else /* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */ - default_profile_ix = NET_DIM_DEF_PROFILE_EQE; +static inline struct net_dim_cq_moder +net_dim_get_def_tx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - return net_dim_get_profile(rx_cq_period_mode, default_profile_ix); + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); } static inline bool net_dim_on_top(struct net_dim *dim) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 35b79f47a13d..c87c3a3453c1 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -55,8 +55,9 @@ enum { NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */ NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ + NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_UDP_BIT, + NETIF_F_GSO_UDP_L4_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -77,6 +78,7 @@ enum { NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */ NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */ NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */ + NETIF_F_HW_TLS_TX_BIT, /* Hardware TLS TX offload */ NETIF_F_GRO_HW_BIT, /* Hardware Generic receive offload */ NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */ @@ -147,6 +149,8 @@ enum { #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) #define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT) #define NETIF_F_HW_TLS_RECORD __NETIF_F(HW_TLS_RECORD) +#define NETIF_F_GSO_UDP_L4 __NETIF_F(GSO_UDP_L4) +#define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) @@ -216,6 +220,7 @@ enum { NETIF_F_GSO_GRE_CSUM | \ NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_IPXIP6 | \ + NETIF_F_GSO_UDP_L4 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf44503ea81a..46dcb5f7522f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -865,6 +865,26 @@ struct xfrmdev_ops { }; #endif +#if IS_ENABLED(CONFIG_TLS_DEVICE) +enum tls_offload_ctx_dir { + TLS_OFFLOAD_CTX_DIR_RX, + TLS_OFFLOAD_CTX_DIR_TX, +}; + +struct tls_crypto_info; +struct tls_context; + +struct tlsdev_ops { + int (*tls_dev_add)(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn); + void (*tls_dev_del)(struct net_device *netdev, + struct tls_context *ctx, + enum tls_offload_ctx_dir direction); +}; +#endif + struct dev_ifalias { struct rcu_head rcuhead; char ifalias[]; @@ -1165,7 +1185,7 @@ struct dev_ifalias { * This function is used to set or query state related to XDP on the * netdevice and manage BPF offload. See definition of * enum bpf_netdev_command for details. - * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp); * This function is used to submit a XDP packet for transmit on a * netdevice. * void (*ndo_xdp_flush)(struct net_device *dev); @@ -1356,7 +1376,7 @@ struct net_device_ops { int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); int (*ndo_xdp_xmit)(struct net_device *dev, - struct xdp_buff *xdp); + struct xdp_frame *xdp); void (*ndo_xdp_flush)(struct net_device *dev); }; @@ -1750,6 +1770,10 @@ struct net_device { const struct xfrmdev_ops *xfrmdev_ops; #endif +#if IS_ENABLED(CONFIG_TLS_DEVICE) + const struct tlsdev_ops *tlsdev_ops; +#endif + const struct header_ops *header_ops; unsigned int flags; @@ -3213,19 +3237,6 @@ static inline int netif_set_xps_queue(struct net_device *dev, } #endif -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues); - -/* - * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used - * as a distribution range limit for the returned value. - */ -static inline u16 skb_tx_hash(const struct net_device *dev, - struct sk_buff *skb) -{ - return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); -} - /** * netif_is_multiqueue - test if device has multiple transmit queues * @dev: network device @@ -4186,6 +4197,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/phy.h b/include/linux/phy.h index f0b5870a6d40..073235e70442 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1068,6 +1068,52 @@ int __init mdio_bus_init(void); void mdio_bus_exit(void); #endif +/* Inline function for use within net/core/ethtool.c (built-in) */ +static inline int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data) +{ + if (!phydev->drv) + return -EIO; + + mutex_lock(&phydev->lock); + phydev->drv->get_strings(phydev, data); + mutex_unlock(&phydev->lock); + + return 0; +} + +static inline int phy_ethtool_get_sset_count(struct phy_device *phydev) +{ + int ret; + + if (!phydev->drv) + return -EIO; + + if (phydev->drv->get_sset_count && + phydev->drv->get_strings && + phydev->drv->get_stats) { + mutex_lock(&phydev->lock); + ret = phydev->drv->get_sset_count(phydev); + mutex_unlock(&phydev->lock); + + return ret; + } + + return -EOPNOTSUPP; +} + +static inline int phy_ethtool_get_stats(struct phy_device *phydev, + struct ethtool_stats *stats, u64 *data) +{ + if (!phydev->drv) + return -EIO; + + mutex_lock(&phydev->lock); + phydev->drv->get_stats(phydev, stats, data); + mutex_unlock(&phydev->lock); + + return 0; +} + extern struct bus_type mdio_bus_type; struct mdio_board_info { diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h deleted file mode 100644 index 11f00cdabe3d..000000000000 --- a/include/linux/platform_data/mdio-gpio.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * MDIO-GPIO bus platform data structures - * - * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef __LINUX_MDIO_GPIO_H -#define __LINUX_MDIO_GPIO_H - -#include <linux/mdio-bitbang.h> - -struct mdio_gpio_platform_data { - /* GPIO numbers for bus pins */ - unsigned int mdc; - unsigned int mdio; - unsigned int mdo; - - bool mdc_active_low; - bool mdio_active_low; - bool mdo_active_low; - - u32 phy_mask; - u32 phy_ignore_ta_mask; - int irqs[PHY_MAX_ADDR]; - /* reset callback */ - int (*reset)(struct mii_bus *bus); -}; - -#endif /* __LINUX_MDIO_GPIO_H */ diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 147d08ccf813..7f9756fe9180 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -352,6 +352,7 @@ struct qed_eth_ops { int (*configure_arfs_searcher)(struct qed_dev *cdev, enum qed_filter_config_mode mode); int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle); + int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b5b2bc9eacca..e53f9c7c2809 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -159,6 +159,9 @@ struct qed_dcbx_get { enum qed_nvm_images { QED_NVM_IMAGE_ISCSI_CFG, QED_NVM_IMAGE_FCOE_CFG, + QED_NVM_IMAGE_NVM_CFG1, + QED_NVM_IMAGE_DEFAULT_CFG, + QED_NVM_IMAGE_NVM_META, }; struct qed_link_eee_params { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 1f8ad121eb43..4e1f535c2034 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -836,9 +836,8 @@ out: * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, @@ -866,9 +865,8 @@ static inline int rhashtable_insert_fast( * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, @@ -890,9 +888,8 @@ static inline int rhltable_insert_key( * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, @@ -922,9 +919,8 @@ static inline int rhltable_insert( * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, @@ -981,9 +977,8 @@ static inline void *rhashtable_lookup_get_insert_fast( * * Lookups may occur in parallel with hashtable mutations and resizing. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. * * Returns zero on success. */ @@ -1134,8 +1129,8 @@ static inline int __rhashtable_remove_fast( * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. + * Will automatically shrink the table if permitted when residency drops + * below 30%. * * Returns zero on success, -ENOENT if the entry could not be found. */ @@ -1156,8 +1151,8 @@ static inline int rhashtable_remove_fast( * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. + * Will automatically shrink the table if permitted when residency drops + * below 30% * * Returns zero on success, -ENOENT if the entry could not be found. */ @@ -1273,8 +1268,9 @@ static inline int rhashtable_walk_init(struct rhashtable *ht, * For a completely stable walk you should construct your own data * structure outside the hash table. * - * This function may sleep so you must not call it from interrupt - * context or with spin locks held. + * This function may be called from any process context, including + * non-preemptable context, but cannot be called from softirq or + * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9065477ed255..908d66e55b14 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -573,6 +573,8 @@ enum { SKB_GSO_ESP = 1 << 15, SKB_GSO_UDP = 1 << 16, + + SKB_GSO_UDP_L4 = 1 << 17, }; #if BITS_PER_LONG > 32 @@ -1032,6 +1034,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); +void skb_copy_header(struct sk_buff *new, const struct sk_buff *old); struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, gfp_t gfp_mask, bool fclone); @@ -3131,6 +3134,7 @@ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len) return skb->data; } +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim @@ -3144,9 +3148,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - return __pskb_trim(skb, len); + return pskb_trim_rcsum_slow(skb, len); } static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 66693bc4c6ad..7127ec301537 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -167,6 +167,8 @@ struct knav_dma_desc { void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config); void knav_dma_close_channel(void *channel); +int knav_dma_get_flow(void *channel); +bool knav_dma_device_ready(void); #else static inline void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) @@ -176,6 +178,16 @@ static inline void *knav_dma_open_channel(struct device *dev, const char *name, static inline void knav_dma_close_channel(void *channel) {} +static inline int knav_dma_get_flow(void *channel) +{ + return -EINVAL; +} + +static inline bool knav_dma_device_ready(void) +{ + return false; +} + #endif #endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */ diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h index 9f0ebb3bad27..9745df6ed9d3 100644 --- a/include/linux/soc/ti/knav_qmss.h +++ b/include/linux/soc/ti/knav_qmss.h @@ -86,5 +86,6 @@ int knav_pool_desc_map(void *ph, void *desc, unsigned size, void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz); dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt); void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma); +bool knav_qmss_device_ready(void); #endif /* __SOC_TI_KNAV_QMSS_H__ */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8f4c54986f97..807776928cb8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -228,7 +228,7 @@ struct tcp_sock { unused:2; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ - unused1 : 1, + recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; @@ -281,6 +281,7 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 delivered_ce; /* Like the above but only ECE marked packets */ u32 lost; /* Total data packets lost incl. rexmits */ u32 app_limited; /* limited until "delivered" reaches this val */ u64 first_tx_mstamp; /* start of window send phase */ diff --git a/include/linux/udp.h b/include/linux/udp.h index eaea63bc79bb..ca840345571b 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -55,6 +55,7 @@ struct udp_sock { * when the socket is uncorked. */ __u16 len; /* total length of pending frames */ + __u16 gso_size; /* * Fields specific to UDP-Lite. */ @@ -87,6 +88,8 @@ struct udp_sock { int forward_deficit; }; +#define UDP_MAX_SEGMENTS (1 << 6UL) + static inline struct udp_sock *udp_sk(const struct sock *sk) { return (struct udp_sock *)sk; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 378d601258be..8312cc25a3af 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -308,6 +308,20 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) } /** + * __in6_dev_get_safely - get inet6_dev pointer from netdevice + * @dev: network device + * + * This is a safer version of __in6_dev_get + */ +static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev) +{ + if (likely(dev)) + return rcu_dereference_rtnl(dev->ip6_ptr); + else + return NULL; +} + +/** * in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device * diff --git a/include/net/ax88796.h b/include/net/ax88796.h index b9a3beca0ce4..84b3785d0e66 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -12,6 +12,10 @@ #ifndef __NET_AX88796_PLAT_H #define __NET_AX88796_PLAT_H +struct sk_buff; +struct net_device; +struct platform_device; + #define AXFLG_HAS_EEPROM (1<<0) #define AXFLG_MAC_FROMDEV (1<<1) /* device already has MAC */ #define AXFLG_HAS_93CX6 (1<<2) /* use eeprom_93cx6 driver */ @@ -26,6 +30,16 @@ struct ax_plat_data { u32 *reg_offsets; /* register offsets */ u8 *mac_addr; /* MAC addr (only used when AXFLG_MAC_FROMPLATFORM is used */ + + /* uses default ax88796 buffer if set to NULL */ + void (*block_output)(struct net_device *dev, int count, + const unsigned char *buf, int star_page); + void (*block_input)(struct net_device *dev, int count, + struct sk_buff *skb, int ring_offset); + /* returns nonzero if a pending interrupt request might by caused by + * the ax88786. Handles all interrupts if set to NULL + */ + int (*check_irq)(struct platform_device *pdev); }; #endif /* __NET_AX88796_PLAT_H */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 60fb4ec8ba61..462e9741b210 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -356,10 +356,13 @@ struct dsa_switch_ops { /* * ethtool hardware statistics. */ - void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); + void (*get_strings)(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data); void (*get_ethtool_stats)(struct dsa_switch *ds, int port, uint64_t *data); - int (*get_sset_count)(struct dsa_switch *ds, int port); + int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); + void (*get_ethtool_phy_stats)(struct dsa_switch *ds, + int port, uint64_t *data); /* * ethtool Wake-on-LAN @@ -588,4 +591,9 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, #define BRCM_TAG_GET_PORT(v) ((v) >> 8) #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) + +int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); +int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); +int dsa_port_get_phy_sset_count(struct dsa_port *dp); + #endif diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e5cfcfc7dd93..b473df5b9512 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -75,7 +75,8 @@ struct fib_rules_ops { int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, - struct nlattr **); + struct nlattr **, + struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d4088d1a688d..db389253dc2a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,7 +64,7 @@ struct inet6_ifaddr { struct delayed_work dad_work; struct inet6_dev *idev; - struct rt6_info *rt; + struct fib6_info *rt; struct hlist_node addr_lst; struct list_head if_list; @@ -143,8 +143,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; - struct inet6_dev *aca_idev; - struct rt6_info *aca_rt; + struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; refcount_t aca_refcnt; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b68fea022a82..2ab6667275df 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_clean_acked Clean acked data hook * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts @@ -102,6 +103,7 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 0a671c32d6b9..83d5b3c2ac42 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -147,6 +147,7 @@ struct inet_cork { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; struct inet_cork_full { diff --git a/include/net/ip.h b/include/net/ip.h index ecffd843e7b8..bada1f1f871e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -76,6 +76,7 @@ struct ipcm_cookie { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) @@ -171,7 +172,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags); + struct inet_cork *cork, unsigned int flags); static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) { @@ -396,6 +397,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } +int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, + u32 *metrics); + u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5e86fd9dc857..1af450d4e923 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -38,6 +38,7 @@ #endif struct rt6_info; +struct fib6_info; struct fib6_config { u32 fc_table; @@ -74,12 +75,12 @@ struct fib6_node { #ifdef CONFIG_IPV6_SUBTREES struct fib6_node __rcu *subtree; #endif - struct rt6_info __rcu *leaf; + struct fib6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; - struct rt6_info __rcu *rr_ptr; + struct fib6_info __rcu *rr_ptr; struct rcu_head rcu; }; @@ -94,11 +95,6 @@ struct fib6_gc_args { #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif -struct mx6_config { - const u32 *mx; - DECLARE_BITMAP(mx_valid, RTAX_MAX); -}; - /* * routing information * @@ -127,56 +123,71 @@ struct rt6_exception { #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_MAX_DEPTH 5 -struct rt6_info { - struct dst_entry dst; - struct rt6_info __rcu *rt6_next; - struct rt6_info *from; +struct fib6_nh { + struct in6_addr nh_gw; + struct net_device *nh_dev; + struct lwtunnel_state *nh_lwtstate; - /* - * Tail elements of dst_entry (__refcnt etc.) - * and these elements (rarely used in hot path) are in - * the same cache line. - */ - struct fib6_table *rt6i_table; - struct fib6_node __rcu *rt6i_node; + unsigned int nh_flags; + atomic_t nh_upper_bound; + int nh_weight; +}; - struct in6_addr rt6i_gateway; +struct fib6_info { + struct fib6_table *fib6_table; + struct fib6_info __rcu *rt6_next; + struct fib6_node __rcu *fib6_node; /* Multipath routes: - * siblings is a list of rt6_info that have the the same metric/weight, + * siblings is a list of fib6_info that have the the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ - struct list_head rt6i_siblings; - unsigned int rt6i_nsiblings; - atomic_t rt6i_nh_upper_bound; + struct list_head fib6_siblings; + unsigned int fib6_nsiblings; - atomic_t rt6i_ref; + atomic_t fib6_ref; + unsigned long expires; + struct dst_metrics *fib6_metrics; +#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] - unsigned int rt6i_nh_flags; + struct rt6key fib6_dst; + u32 fib6_flags; + struct rt6key fib6_src; + struct rt6key fib6_prefsrc; - /* These are in a separate cache line. */ - struct rt6key rt6i_dst ____cacheline_aligned_in_smp; - u32 rt6i_flags; + struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; + + u32 fib6_metric; + u8 fib6_protocol; + u8 fib6_type; + u8 exception_bucket_flushed:1, + should_flush:1, + dst_nocount:1, + dst_nopolicy:1, + dst_host:1, + unused:3; + + struct fib6_nh fib6_nh; +}; + +struct rt6_info { + struct dst_entry dst; + struct fib6_info __rcu *from; + + struct rt6key rt6i_dst; struct rt6key rt6i_src; + struct in6_addr rt6i_gateway; + struct inet6_dev *rt6i_idev; + u32 rt6i_flags; struct rt6key rt6i_prefsrc; struct list_head rt6i_uncached; struct uncached_list *rt6i_uncached_list; - struct inet6_dev *rt6i_idev; - struct rt6_info * __percpu *rt6i_pcpu; - struct rt6_exception_bucket __rcu *rt6i_exception_bucket; - - u32 rt6i_metric; - u32 rt6i_pmtu; /* more non-fragment space at head required */ - int rt6i_nh_weight; unsigned short rt6i_nfheader_len; - u8 rt6i_protocol; - u8 exception_bucket_flushed:1, - should_flush:1, - unused:6; }; #define for_each_fib6_node_rt_rcu(fn) \ @@ -192,27 +203,24 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) return ((struct rt6_info *)dst)->rt6i_idev; } -static inline void rt6_clean_expires(struct rt6_info *rt) +static inline void fib6_clean_expires(struct fib6_info *f6i) { - rt->rt6i_flags &= ~RTF_EXPIRES; - rt->dst.expires = 0; + f6i->fib6_flags &= ~RTF_EXPIRES; + f6i->expires = 0; } -static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) { - rt->dst.expires = expires; - rt->rt6i_flags |= RTF_EXPIRES; + f6i->expires = expires; + f6i->fib6_flags |= RTF_EXPIRES; } -static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) +static inline bool fib6_check_expired(const struct fib6_info *f6i) { - struct rt6_info *rt; - - for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from); - if (rt && rt != rt0) - rt0->dst.expires = rt->dst.expires; - dst_set_expires(&rt0->dst, timeout); - rt0->rt6i_flags |= RTF_EXPIRES; + if (f6i->fib6_flags & RTF_EXPIRES) + return time_after(jiffies, f6i->expires); + return false; } /* Function to safely get fn->sernum for passed in rt @@ -220,14 +228,13 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) * Return true if we can get cookie safely * Return false if not */ -static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, - u32 *cookie) +static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, + u32 *cookie) { struct fib6_node *fn; bool status = false; - rcu_read_lock(); - fn = rcu_dereference(rt->rt6i_node); + fn = rcu_dereference(f6i->fib6_node); if (fn) { *cookie = fn->fn_sernum; @@ -236,19 +243,22 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, status = true; } - rcu_read_unlock(); return status; } static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + struct fib6_info *from; u32 cookie = 0; - if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) - rt = rt->from; + rcu_read_lock(); + + from = rcu_dereference(rt->from); + if (from && (rt->rt6i_flags & RTF_PCPU || + unlikely(!list_empty(&rt->rt6i_uncached)))) + fib6_get_cookie_safe(from, &cookie); - rt6_get_cookie_safe(rt, &cookie); + rcu_read_unlock(); return cookie; } @@ -262,20 +272,18 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); +void fib6_info_destroy(struct fib6_info *f6i); -static inline void rt6_hold(struct rt6_info *rt) +static inline void fib6_info_hold(struct fib6_info *f6i) { - atomic_inc(&rt->rt6i_ref); + atomic_inc(&f6i->fib6_ref); } -static inline void rt6_release(struct rt6_info *rt) +static inline void fib6_info_release(struct fib6_info *f6i) { - if (atomic_dec_and_test(&rt->rt6i_ref)) { - rt6_free_pcpu(rt); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } + if (f6i && atomic_dec_and_test(&f6i->fib6_ref)) + fib6_info_destroy(f6i); } enum fib6_walk_state { @@ -291,7 +299,7 @@ enum fib6_walk_state { struct fib6_walker { struct list_head lh; struct fib6_node *root, *node; - struct rt6_info *leaf; + struct fib6_info *leaf; enum fib6_walk_state state; unsigned int skip; unsigned int count; @@ -355,7 +363,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ - struct rt6_info *rt; + struct fib6_info *rt; }; /* @@ -377,15 +385,19 @@ struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *saddr, int src_len, bool exact_match); -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack); -int fib6_del(struct rt6_info *rt, struct nl_info *info); +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack); +int fib6_del(struct fib6_info *rt, struct nl_info *info); + +static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) +{ + return f6i->fib6_nh.nh_dev; +} -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); @@ -408,8 +420,14 @@ void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); -void fib6_update_sernum(struct rt6_info *rt); -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt); +void fib6_update_sernum(struct net *net, struct fib6_info *rt); +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); + +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); +static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) +{ + return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); +} #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 08b132381984..8df4ff798b04 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -66,9 +66,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } -static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) +static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { - return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == RTF_GATEWAY; } @@ -100,29 +100,29 @@ void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); -int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); -int ip6_ins_rt(struct rt6_info *); -int ip6_del_rt(struct rt6_info *); +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +int ip6_ins_rt(struct net *net, struct fib6_info *f6i); +int ip6_del_rt(struct net *net, struct fib6_info *f6i); -void rt6_flush_exceptions(struct rt6_info *rt); -int rt6_remove_exception_rt(struct rt6_info *rt); -void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, +void rt6_flush_exceptions(struct fib6_info *f6i); +void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, unsigned long now); -static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, +static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = - rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; int err = 0; - if (rt && rt->rt6i_prefsrc.plen) - *saddr = rt->rt6i_prefsrc.addr; - else - err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, - daddr, prefs, saddr); + if (f6i && f6i->fib6_prefsrc.plen) { + *saddr = f6i->fib6_prefsrc.addr; + } else { + struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + + err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); + } return err; } @@ -137,8 +137,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); void fib6_force_start_gc(struct net *net); -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, bool anycast); +struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev, + const struct in6_addr *addr, bool anycast, + gfp_t gfp_flags); struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags); @@ -147,9 +148,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, * support functions for ND * */ -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, struct net_device *dev); -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); void rt6_purge_dflt_routers(struct net *net); @@ -174,14 +177,14 @@ struct rt6_rtnl_dump_arg { struct net *net; }; -int rt6_dump_route(struct rt6_info *rt, void *p_arg); +int rt6_dump_route(struct fib6_info *f6i, void *p_arg); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); -void rt6_multipath_rebalance(struct rt6_info *rt); +void rt6_multipath_rebalance(struct fib6_info *f6i); void rt6_uncached_list_add(struct rt6_info *rt); void rt6_uncached_list_del(struct rt6_info *rt); @@ -269,12 +272,14 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } -static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->dst.dev == b->dst.dev && - a->rt6i_idev == b->rt6i_idev && - ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && - !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); + return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && + ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && + !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); } +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, struct sk_buff *skb, + const void *daddr); #endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 540a4b4417bf..751646adc769 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -379,6 +379,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, return 0; } +static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, + const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return iph->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ((const struct ipv6hdr *)iph)->hop_limit; + else + return 0; +} + /* Propogate ECN bits out */ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, const struct sk_buff *skb) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 836f31af1369..0a872a7c33c8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -298,6 +298,7 @@ struct ipcm6_cookie { __s16 tclass; __s8 dontfrag; struct ipv6_txoptions *opt; + __u16 gso_size; }; static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) @@ -950,6 +951,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, + struct inet_cork_full *cork, const struct sockcm_cookie *sockc); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) @@ -1044,8 +1046,6 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); int inet6_release(struct socket *sock); -int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e421f86af043..6c1eecd56a4d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -246,6 +246,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 +#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 @@ -526,5 +527,21 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, } while (read_seqretry(&n->ha_lock, seq)); } - +static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, + int *notify) +{ + u8 ndm_flags = 0; + + if (!(flags & NEIGH_UPDATE_F_ADMIN)) + return; + + ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; + if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { + if (ndm_flags & NTF_EXT_LEARNED) + neigh->flags |= NTF_EXT_LEARNED; + else + neigh->flags &= ~NTF_EXT_LEARNED; + *notify = 1; + } +} #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index c29f09cfc9d7..c978a31b0f84 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -43,6 +43,7 @@ struct netns_sysctl_ipv6 { int max_hbh_opts_cnt; int max_dst_opts_len; int max_hbh_opts_len; + int seg6_flowlabel; }; struct netns_ipv6 { @@ -60,7 +61,8 @@ struct netns_ipv6 { #endif struct xt_table *ip6table_nat; #endif - struct rt6_info *ip6_null_entry; + struct fib6_info *fib6_null_entry; + struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; struct timer_list ip6_fib_timer; struct hlist_head *fib_table_hash; diff --git a/include/net/page_pool.h b/include/net/page_pool.h new file mode 100644 index 000000000000..c79087153148 --- /dev/null +++ b/include/net/page_pool.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * page_pool.h + * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> + * Copyright (C) 2016 Red Hat, Inc. + */ + +/** + * DOC: page_pool allocator + * + * This page_pool allocator is optimized for the XDP mode that + * uses one-frame-per-page, but have fallbacks that act like the + * regular page allocator APIs. + * + * Basic use involve replacing alloc_pages() calls with the + * page_pool_alloc_pages() call. Drivers should likely use + * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). + * + * If page_pool handles DMA mapping (use page->private), then API user + * is responsible for invoking page_pool_put_page() once. In-case of + * elevated refcnt, the DMA state is released, assuming other users of + * the page will eventually call put_page(). + * + * If no DMA mapping is done, then it can act as shim-layer that + * fall-through to alloc_page. As no state is kept on the page, the + * regular put_page() call is sufficient. + */ +#ifndef _NET_PAGE_POOL_H +#define _NET_PAGE_POOL_H + +#include <linux/mm.h> /* Needed by ptr_ring */ +#include <linux/ptr_ring.h> +#include <linux/dma-direction.h> + +#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */ +#define PP_FLAG_ALL PP_FLAG_DMA_MAP + +/* + * Fast allocation side cache array/stack + * + * The cache size and refill watermark is related to the network + * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX + * ring is usually refilled and the max consumed elements will be 64, + * thus a natural max size of objects needed in the cache. + * + * Keeping room for more objects, is due to XDP_DROP use-case. As + * XDP_DROP allows the opportunity to recycle objects directly into + * this array, as it shares the same softirq/NAPI protection. If + * cache is already full (or partly full) then the XDP_DROP recycles + * would have to take a slower code path. + */ +#define PP_ALLOC_CACHE_SIZE 128 +#define PP_ALLOC_CACHE_REFILL 64 +struct pp_alloc_cache { + u32 count; + void *cache[PP_ALLOC_CACHE_SIZE]; +}; + +struct page_pool_params { + unsigned int flags; + unsigned int order; + unsigned int pool_size; + int nid; /* Numa node id to allocate from pages from */ + struct device *dev; /* device, for DMA pre-mapping purposes */ + enum dma_data_direction dma_dir; /* DMA mapping direction */ +}; + +struct page_pool { + struct rcu_head rcu; + struct page_pool_params p; + + /* + * Data structure for allocation side + * + * Drivers allocation side usually already perform some kind + * of resource protection. Piggyback on this protection, and + * require driver to protect allocation side. + * + * For NIC drivers this means, allocate a page_pool per + * RX-queue. As the RX-queue is already protected by + * Softirq/BH scheduling and napi_schedule. NAPI schedule + * guarantee that a single napi_struct will only be scheduled + * on a single CPU (see napi_schedule). + */ + struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; + + /* Data structure for storing recycled pages. + * + * Returning/freeing pages is more complicated synchronization + * wise, because free's can happen on remote CPUs, with no + * association with allocation resource. + * + * Use ptr_ring, as it separates consumer and producer + * effeciently, it a way that doesn't bounce cache-lines. + * + * TODO: Implement bulk return pages into this structure. + */ + struct ptr_ring ring; +}; + +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); + +static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_pages(pool, gfp); +} + +struct page_pool *page_pool_create(const struct page_pool_params *params); + +void page_pool_destroy(struct page_pool *pool); + +/* Never call this directly, use helpers below */ +void __page_pool_put_page(struct page_pool *pool, + struct page *page, bool allow_direct); + +static inline void page_pool_put_page(struct page_pool *pool, struct page *page) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + __page_pool_put_page(pool, page, false); +#endif +} +/* Very limited use-cases allow recycle direct */ +static inline void page_pool_recycle_direct(struct page_pool *pool, + struct page *page) +{ + __page_pool_put_page(pool, page, true); +} + +static inline bool is_page_pool_compiled_in(void) +{ +#ifdef CONFIG_PAGE_POOL + return true; +#else + return false; +#endif +} + +#endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 20ff237c5eb2..86f034b524d4 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -254,11 +254,10 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 }; #define SCTP_TSN_MAP_SIZE 4096 /* We will not record more than this many duplicate TSNs between two - * SACKs. The minimum PMTU is 576. Remove all the headers and there - * is enough room for 131 duplicate reports. Round down to the + * SACKs. The minimum PMTU is 512. Remove all the headers and there + * is enough room for 117 duplicate reports. Round down to the * nearest power of 2. */ -enum { SCTP_MIN_PMTU = 576 }; enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 28b996d63490..f66d44350007 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -428,32 +428,6 @@ static inline int sctp_list_single_entry(struct list_head *head) return (head->next != head) && (head->next == head->prev); } -/* Break down data chunks at this point. */ -static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) -{ - struct sctp_sock *sp = sctp_sk(asoc->base.sk); - struct sctp_af *af = sp->pf->af; - int frag = pmtu; - - frag -= af->ip_options_len(asoc->base.sk); - frag -= af->net_header_len; - frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream); - - if (asoc->user_frag) - frag = min_t(int, frag, asoc->user_frag); - - frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - - sctp_datachk_len(&asoc->stream))); - - return frag; -} - -static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) -{ - sctp_assoc_sync_pmtu(asoc); - asoc->pmtu_pending = 0; -} - static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk) { return !list_empty(&chunk->list); @@ -607,17 +581,29 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport * return t->dst; } -static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) +/* Calculate max payload size given a MTU, or the total overhead if + * given MTU is zero + */ +static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp, + __u32 mtu, __u32 extra) { - __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)), - SCTP_DEFAULT_MINSEGMENT); + __u32 overhead = sizeof(struct sctphdr) + extra; - if (t->pathmtu == pmtu) - return true; + if (sp) + overhead += sp->pf->af->net_header_len; + else + overhead += sizeof(struct ipv6hdr); - t->pathmtu = pmtu; + if (WARN_ON_ONCE(mtu && mtu <= overhead)) + mtu = overhead; - return false; + return mtu ? mtu - overhead : overhead; +} + +static inline __u32 sctp_dst_mtu(const struct dst_entry *dst) +{ + return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst), + SCTP_DEFAULT_MINSEGMENT)); } #endif /* __net_sctp_h__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2d0e782c9055..5ef1bad81ef5 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -207,7 +207,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, int len, __u8 flags, gfp_t gfp); struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn); -struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); +struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc); struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, @@ -215,7 +215,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, struct sctp_chunk *sctp_make_shutdown_complete( const struct sctp_association *asoc, const struct sctp_chunk *chunk); -void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); +int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, const struct sctp_chunk *chunk, const size_t hint); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a0ec462bc1a9..ebf809eed33a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -2091,16 +2091,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, enum sctp_transport_cmd command, sctp_sn_error_t error); struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32); -struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, - struct net *, - const union sctp_addr *, - const union sctp_addr *); void sctp_assoc_migrate(struct sctp_association *, struct sock *); int sctp_assoc_update(struct sctp_association *old, struct sctp_association *new); __u32 sctp_association_get_next_tsn(struct sctp_association *); +void sctp_assoc_update_frag_point(struct sctp_association *asoc); +void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu); void sctp_assoc_sync_pmtu(struct sctp_association *asoc); void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); diff --git a/include/net/sock.h b/include/net/sock.h index 74d725fdbe0f..3c568b36ee36 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -481,6 +481,11 @@ struct sock { void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); +#endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; struct rcu_head sk_rcu; @@ -2332,6 +2337,22 @@ static inline bool sk_fullsock(const struct sock *sk) return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); } +/* Checks if this SKB belongs to an HW offloaded socket + * and whether any SW fallbacks are required based on dev. + */ +static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, + struct net_device *dev) +{ +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sock *sk = skb->sk; + + if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) + skb = sk->sk_validate_xmit_skb(sk, dev, skb); +#endif + + return skb; +} + /* This helper checks if a socket is a LISTEN or NEW_SYN_RECV * SYNACK messages can be attached to either ones (depending on SYNCOOKIE) */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 39bc855d7fee..d574ce63bf22 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -155,6 +155,7 @@ struct switchdev_notifier_fdb_info { struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; + bool added_by_user; }; static inline struct net_device * diff --git a/include/net/tcp.h b/include/net/tcp.h index 9c9b3768b350..cf803fe0fb86 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -402,6 +402,10 @@ void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); +int tcp_set_rcvlowat(struct sock *sk, int val); +void tcp_data_ready(struct sock *sk); +int tcp_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma); void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); @@ -2101,4 +2105,12 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +void clean_acked_data_enable(struct inet_connection_sock *icsk, + void (*cad)(struct sock *sk, u32 ack_seq)); +void clean_acked_data_disable(struct inet_connection_sock *icsk); + +#endif + #endif /* _TCP_H */ diff --git a/include/net/tls.h b/include/net/tls.h index b400d0bb7448..ee78f339b4b3 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -83,21 +83,10 @@ struct tls_device { void (*unhash)(struct tls_device *device, struct sock *sk); }; -struct tls_sw_context { +struct tls_sw_context_tx { struct crypto_aead *aead_send; - struct crypto_aead *aead_recv; struct crypto_wait async_wait; - /* Receive context */ - struct strparser strp; - void (*saved_data_ready)(struct sock *sk); - unsigned int (*sk_poll)(struct file *file, struct socket *sock, - struct poll_table_struct *wait); - struct sk_buff *recv_pkt; - u8 control; - bool decrypted; - - /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; unsigned int sg_plaintext_size; @@ -114,6 +103,50 @@ struct tls_sw_context { struct scatterlist sg_aead_out[2]; }; +struct tls_sw_context_rx { + struct crypto_aead *aead_recv; + struct crypto_wait async_wait; + + struct strparser strp; + void (*saved_data_ready)(struct sock *sk); + unsigned int (*sk_poll)(struct file *file, struct socket *sock, + struct poll_table_struct *wait); + struct sk_buff *recv_pkt; + u8 control; + bool decrypted; +}; + +struct tls_record_info { + struct list_head list; + u32 end_seq; + int len; + int num_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct tls_offload_context { + struct crypto_aead *aead_send; + spinlock_t lock; /* protects records list */ + struct list_head records_list; + struct tls_record_info *open_record; + struct tls_record_info *retransmit_hint; + u64 hint_record_sn; + u64 unacked_record_sn; + + struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; + void (*sk_destruct)(struct sock *sk); + u8 driver_state[]; + /* The TLS layer reserves room for driver specific state + * Currently the belief is that there is not enough + * driver specific state to justify another layer of indirection + */ +#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *))) +}; + +#define TLS_OFFLOAD_CONTEXT_SIZE \ + (ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) + \ + TLS_DRIVER_STATE_SIZE) + enum { TLS_PENDING_CLOSED_RECORD }; @@ -138,9 +171,15 @@ struct tls_context { struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128; }; - void *priv_ctx; + struct list_head list; + struct net_device *netdev; + refcount_t refcount; + + void *priv_ctx_tx; + void *priv_ctx_rx; - u8 conf:3; + u8 tx_conf:3; + u8 rx_conf:3; struct cipher_context tx; struct cipher_context rx; @@ -178,7 +217,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); -void tls_sw_free_resources(struct sock *sk); +void tls_sw_free_resources_tx(struct sock *sk); +void tls_sw_free_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); unsigned int tls_sw_poll(struct file *file, struct socket *sock, @@ -187,9 +227,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); -void tls_icsk_clean_acked(struct sock *sk); +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_device_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags); +void tls_device_sk_destruct(struct sock *sk); +void tls_device_init(void); +void tls_device_cleanup(void); +struct tls_record_info *tls_get_record(struct tls_offload_context *context, + u32 seq, u64 *p_record_sn); + +static inline bool tls_record_is_start_marker(struct tls_record_info *rec) +{ + return rec->len == 0; +} + +static inline u32 tls_record_start_seq(struct tls_record_info *rec) +{ + return rec->end_seq - rec->len; +} + +void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); int tls_push_sg(struct sock *sk, struct tls_context *ctx, struct scatterlist *sg, u16 first_offset, int flags); @@ -226,6 +285,13 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) return tls_ctx->pending_open_record_frags; } +static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) +{ + return sk_fullsock(sk) && + /* matches smp_store_release in tls_set_device_offload */ + smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct; +} + static inline void tls_err_abort(struct sock *sk, int err) { sk->sk_err = err; @@ -298,16 +364,22 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk) return icsk->icsk_ulp_data; } -static inline struct tls_sw_context *tls_sw_ctx( +static inline struct tls_sw_context_rx *tls_sw_ctx_rx( const struct tls_context *tls_ctx) { - return (struct tls_sw_context *)tls_ctx->priv_ctx; + return (struct tls_sw_context_rx *)tls_ctx->priv_ctx_rx; +} + +static inline struct tls_sw_context_tx *tls_sw_ctx_tx( + const struct tls_context *tls_ctx) +{ + return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx; } static inline struct tls_offload_context *tls_offload_ctx( const struct tls_context *tls_ctx) { - return (struct tls_offload_context *)tls_ctx->priv_ctx; + return (struct tls_offload_context *)tls_ctx->priv_ctx_tx; } int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, @@ -315,4 +387,12 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, void tls_register_device(struct tls_device *device); void tls_unregister_device(struct tls_device *device); +struct sk_buff *tls_validate_xmit_skb(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); + +int tls_sw_fallback_init(struct sock *sk, + struct tls_offload_context *offload_ctx, + struct tls_crypto_info *crypto_info); + #endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 0676b272f6ac..05990746810e 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -174,6 +174,10 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); +struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features, + unsigned int mss, __sum16 check); + static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { struct udphdr *uh; @@ -269,6 +273,7 @@ int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); +int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index ad73d8b3fcc2..b99a02ae3934 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -262,6 +262,7 @@ struct vxlan_dev { #define VXLAN_F_COLLECT_METADATA 0x2000 #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 +#define VXLAN_F_TTL_INHERIT 0x10000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable diff --git a/include/net/xdp.h b/include/net/xdp.h index b2362ddfa694..137ad5f9f40f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -33,16 +33,99 @@ * also mandatory during RX-ring setup. */ +enum xdp_mem_type { + MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */ + MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */ + MEM_TYPE_PAGE_POOL, + MEM_TYPE_MAX, +}; + +struct xdp_mem_info { + u32 type; /* enum xdp_mem_type, but known size type */ + u32 id; +}; + +struct page_pool; + struct xdp_rxq_info { struct net_device *dev; u32 queue_index; u32 reg_state; + struct xdp_mem_info mem; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ +struct xdp_buff { + void *data; + void *data_end; + void *data_meta; + void *data_hard_start; + struct xdp_rxq_info *rxq; +}; + +struct xdp_frame { + void *data; + u16 len; + u16 headroom; + u16 metasize; + /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, + * while mem info is valid on remote CPU. + */ + struct xdp_mem_info mem; + struct net_device *dev_rx; /* used by cpumap */ +}; + +/* Convert xdp_buff to xdp_frame */ +static inline +struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) +{ + struct xdp_frame *xdp_frame; + int metasize; + int headroom; + + /* Assure headroom is available for storing info */ + headroom = xdp->data - xdp->data_hard_start; + metasize = xdp->data - xdp->data_meta; + metasize = metasize > 0 ? metasize : 0; + if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) + return NULL; + + /* Store info in top of packet */ + xdp_frame = xdp->data_hard_start; + + xdp_frame->data = xdp->data; + xdp_frame->len = xdp->data_end - xdp->data; + xdp_frame->headroom = headroom - sizeof(*xdp_frame); + xdp_frame->metasize = metasize; + + /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ + xdp_frame->mem = xdp->rxq->mem; + + return xdp_frame; +} + +void xdp_return_frame(struct xdp_frame *xdpf); + int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); +int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + enum xdp_mem_type type, void *allocator); + +/* Drivers not supporting XDP metadata can use this helper, which + * rejects any room expansion for metadata as a result. + */ +static __always_inline void +xdp_set_data_meta_invalid(struct xdp_buff *xdp) +{ + xdp->data_meta = xdp->data + 1; +} + +static __always_inline bool +xdp_data_meta_unsupported(const struct xdp_buff *xdp) +{ + return unlikely(xdp->data_meta > xdp->data); +} #endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 878b2be7ce77..c1a5284713b8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -10,6 +10,7 @@ #include <linux/tracepoint.h> #include <net/ipv6.h> #include <net/tcp.h> +#include <linux/sock_diag.h> #define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ do { \ @@ -113,7 +114,7 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset, */ DECLARE_EVENT_CLASS(tcp_event_sk, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), TP_ARGS(sk), @@ -125,6 +126,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk, __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) + __field(__u64, sock_cookie) ), TP_fast_assign( @@ -144,73 +146,36 @@ DECLARE_EVENT_CLASS(tcp_event_sk, TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); + + __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, - __entry->saddr_v6, __entry->daddr_v6) + __entry->saddr_v6, __entry->daddr_v6, + __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), TP_ARGS(sk) ); -TRACE_EVENT(tcp_set_state, - - TP_PROTO(const struct sock *sk, const int oldstate, const int newstate), - - TP_ARGS(sk, oldstate, newstate), - - TP_STRUCT__entry( - __field(const void *, skaddr) - __field(int, oldstate) - __field(int, newstate) - __field(__u16, sport) - __field(__u16, dport) - __array(__u8, saddr, 4) - __array(__u8, daddr, 4) - __array(__u8, saddr_v6, 16) - __array(__u8, daddr_v6, 16) - ), - - TP_fast_assign( - struct inet_sock *inet = inet_sk(sk); - __be32 *p32; - - __entry->skaddr = sk; - __entry->oldstate = oldstate; - __entry->newstate = newstate; - - __entry->sport = ntohs(inet->inet_sport); - __entry->dport = ntohs(inet->inet_dport); +DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, - p32 = (__be32 *) __entry->saddr; - *p32 = inet->inet_saddr; + TP_PROTO(struct sock *sk), - p32 = (__be32 *) __entry->daddr; - *p32 = inet->inet_daddr; - - TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, - sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); - ), - - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", - __entry->sport, __entry->dport, - __entry->saddr, __entry->daddr, - __entry->saddr_v6, __entry->daddr_v6, - show_tcp_state_name(__entry->oldstate), - show_tcp_state_name(__entry->newstate)) + TP_ARGS(sk) ); TRACE_EVENT(tcp_retransmit_synack, @@ -279,6 +244,7 @@ TRACE_EVENT(tcp_probe, __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) + __field(__u64, sock_cookie) ), TP_fast_assign( @@ -303,15 +269,14 @@ TRACE_EVENT(tcp_probe, __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; + __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x " - "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u " - "rcv_wnd=%u", + TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", __entry->saddr, __entry->daddr, __entry->mark, __entry->length, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, - __entry->srtt, __entry->rcv_wnd) + __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie) ); #endif /* _TRACE_TCP_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c5ec89732a8d..da77a9388947 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -95,6 +95,7 @@ enum bpf_cmd { BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, }; enum bpf_map_type { @@ -279,6 +280,9 @@ union bpf_attr { */ char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_id; /* BTF type_id of the key */ + __u32 btf_value_id; /* BTF type_id of the value */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -363,398 +367,1406 @@ union bpf_attr { __u64 name; __u32 prog_fd; } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + }; } __attribute__((aligned(8))); -/* BPF helper function descriptions: - * - * void *bpf_map_lookup_elem(&map, &key) - * Return: Map value or NULL - * - * int bpf_map_update_elem(&map, &key, &value, flags) - * Return: 0 on success or negative error - * - * int bpf_map_delete_elem(&map, &key) - * Return: 0 on success or negative error - * - * int bpf_probe_read(void *dst, int size, void *src) - * Return: 0 on success or negative error +/* The description below is an attempt at providing documentation to eBPF + * developers about the multiple available eBPF helper functions. It can be + * parsed and used to produce a manual page. The workflow is the following, + * and requires the rst2man utility: + * + * $ ./scripts/bpf_helpers_doc.py \ + * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst + * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 + * $ man /tmp/bpf-helpers.7 + * + * Note that in order to produce this external documentation, some RST + * formatting is used in the descriptions to get "bold" and "italics" in + * manual pages. Also note that the few trailing white spaces are + * intentional, removing them would break paragraphs for rst2man. + * + * Start of BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + * Description + * Perform a lookup in *map* for an entry associated to *key*. + * Return + * Map value associated to *key*, or **NULL** if no entry was + * found. + * + * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * Description + * Add or update the value of the entry associated to *key* in + * *map* with *value*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * Flag value **BPF_NOEXIST** cannot be used for maps of types + * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all + * elements always exist), the helper would return an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * Description + * Delete entry with *key* from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read(void *dst, u32 size, const void *src) + * Description + * For tracing programs, safely attempt to read *size* bytes from + * address *src* and store the data in *dst*. + * Return + * 0 on success, or a negative error in case of failure. * * u64 bpf_ktime_get_ns(void) - * Return: current ktime - * - * int bpf_trace_printk(const char *fmt, int fmt_size, ...) - * Return: length of buffer written or negative error - * - * u32 bpf_prandom_u32(void) - * Return: random value - * - * u32 bpf_raw_smp_processor_id(void) - * Return: SMP processor ID - * - * int bpf_skb_store_bytes(skb, offset, from, len, flags) - * store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_l3_csum_replace(skb, offset, from, to, flags) - * recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_l4_csum_replace(skb, offset, from, to, flags) - * recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_tail_call(ctx, prog_array_map, index) - * jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: 32-bit index inside array that selects specific program to run - * Return: 0 on success or negative error - * - * int bpf_clone_redirect(skb, ifindex, flags) - * redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success or negative error + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Return + * Current *ktime*. + * + * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * Description + * This helper is a "printk()-like" facility for debugging. It + * prints a message defined by format *fmt* (of size *fmt_size*) + * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * available. It can take up to three additional **u64** + * arguments (as an eBPF helpers, the total number of arguments is + * limited to five). + * + * Each time the helper is called, it appends a line to the trace. + * The format of the trace is customizable, and the exact output + * one will get depends on the options set in + * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *README* file under the same directory). However, it usually + * defaults to something like: + * + * :: + * + * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> + * + * In the above: + * + * * ``telnet`` is the name of the current task. + * * ``470`` is the PID of the current task. + * * ``001`` is the CPU number on which the task is + * running. + * * In ``.N..``, each character refers to a set of + * options (whether irqs are enabled, scheduling + * options, whether hard/softirqs are running, level of + * preempt_disabled respectively). **N** means that + * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** + * are set. + * * ``419421.045894`` is a timestamp. + * * ``0x00000001`` is a fake value used by BPF for the + * instruction pointer register. + * * ``<formatted msg>`` is the message formatted with + * *fmt*. + * + * The conversion specifiers supported by *fmt* are similar, but + * more limited than for printk(). They are **%d**, **%i**, + * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, + * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size + * of field, padding with zeroes, etc.) is available, and the + * helper will return **-EINVAL** (but print nothing) if it + * encounters an unknown specifier. + * + * Also, note that **bpf_trace_printk**\ () is slow, and should + * only be used for debugging purposes. For this reason, a notice + * bloc (spanning several lines) is printed to kernel logs and + * states that the helper should not be used "for production use" + * the first time this helper is used (or more precisely, when + * **trace_printk**\ () buffers are allocated). For passing values + * to user space, perf events should be preferred. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * u32 bpf_get_prandom_u32(void) + * Description + * Get a pseudo-random number. + * + * From a security point of view, this helper uses its own + * pseudo-random internal state, and cannot be used to infer the + * seed of other random functions in the kernel. However, it is + * essential to note that the generator used by the helper is not + * cryptographically secure. + * Return + * A random 32-bit unsigned value. + * + * u32 bpf_get_smp_processor_id(void) + * Description + * Get the SMP (symmetric multiprocessing) processor id. Note that + * all programs run with preemption disabled, which means that the + * SMP processor id is stable during all the execution of the + * program. + * Return + * The SMP id of the processor running the program. + * + * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. *flags* are a combination of + * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the + * checksum for the packet after storing the bytes) and + * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ + * **->swhash** and *skb*\ **->l4hash** to 0). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * Description + * Recompute the layer 3 (e.g. IP) checksum for the packet + * associated to *skb*. Computation is incremental, so the helper + * must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored in *size*. + * Alternatively, it is possible to store the difference between + * the previous and the new values of the header field in *to*, by + * setting *from* and *size* to 0. For both methods, *offset* + * indicates the location of the IP checksum within the packet. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * Description + * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the + * packet associated to *skb*. Computation is incremental, so the + * helper must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored on the lowest + * four bits of *flags*. Alternatively, it is possible to store + * the difference between the previous and the new values of the + * header field in *to*, by setting *from* and the four lowest + * bits of *flags* to 0. For both methods, *offset* indicates the + * location of the IP checksum within the packet. In addition to + * the size of the field, *flags* can be added (bitwise OR) actual + * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left + * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * Description + * This special helper is used to trigger a "tail call", or in + * other words, to jump into another eBPF program. The same stack + * frame is used (but values on stack and in registers for the + * caller are not accessible to the callee). This mechanism allows + * for program chaining, either for raising the maximum number of + * available eBPF instructions, or to execute given programs in + * conditional blocks. For security reasons, there is an upper + * limit to the number of successive tail calls that can be + * performed. + * + * Upon call of this helper, the program attempts to jump into a + * program referenced at index *index* in *prog_array_map*, a + * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes + * *ctx*, a pointer to the context. + * + * If the call succeeds, the kernel immediately runs the first + * instruction of the new program. This is not a function call, + * and it never returns to the previous program. If the call + * fails, then the helper has no effect, and the caller continues + * to run its subsequent instructions. A call can fail if the + * destination program for the jump does not exist (i.e. *index* + * is superior to the number of entries in *prog_array_map*), or + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), + * which is currently set to 32. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * Description + * Clone and redirect the packet associated to *skb* to another + * net device of index *ifindex*. Both ingress and egress + * interfaces can be used for redirection. The **BPF_F_INGRESS** + * value in *flags* is used to make the distinction (ingress path + * is selected if the flag is present, egress path otherwise). + * This is the only flag supported for now. + * + * In comparison with **bpf_redirect**\ () helper, + * **bpf_clone_redirect**\ () has the associated cost of + * duplicating the packet buffer, but this can be executed out of + * the eBPF program. Conversely, **bpf_redirect**\ () is more + * efficient, but it is handled through an action code where the + * redirection happens only after the eBPF program has returned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid + * Return + * A 64-bit integer containing the current tgid and pid, and + * created as such: + * *current_task*\ **->tgid << 32 \|** + * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - * - * int bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success or negative error - * - * u32 bpf_get_cgroup_classid(skb) - * retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - * - * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) - * Return: 0 on success or negative error - * - * int bpf_skb_vlan_pop(skb) - * Return: 0 on success or negative error - * - * int bpf_skb_get_tunnel_key(skb, key, size, flags) - * int bpf_skb_set_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Return: 0 on success or negative error - * - * u64 bpf_perf_event_read(map, flags) - * read perf event counter value - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * Return: value of perf event counter read or error code - * - * int bpf_redirect(ifindex, flags) - * redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: - * cls_bpf: - * bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * xdp_bpf: - * all bits - reserved - * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error - * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error - * int bpf_redirect_map(map, key, flags) - * redirect to endpoint in map - * @map: pointer to dev map - * @key: index in map to lookup - * @flags: -- - * Return: XDP_REDIRECT on success or XDP_ABORT on error - * - * u32 bpf_get_route_realm(skb) - * retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - * - * int bpf_perf_event_output(ctx, map, flags, data, size) - * output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success or negative error - * - * int bpf_get_stackid(ctx, map, flags) - * walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - * - * s64 bpf_csum_diff(from, from_size, to, to_size, seed) - * calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result or negative error code - * - * int bpf_skb_get_tunnel_opt(skb, opt, size) - * retrieve tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: option size - * - * int bpf_skb_set_tunnel_opt(skb, opt, size) - * populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success or negative error - * - * int bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is v4 -> v6, - * v6 -> v4 transitions. The helper will also resize the skb. eBPF - * program is expected to fill the new headers via skb_store_bytes - * and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - * - * int bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - * - * int bpf_skb_under_cgroup(skb, map, index) - * Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - * - * u32 bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash + * Return + * A 64-bit integer containing the current GID and UID, and + * created as such: *current_gid* **<< 32 \|** *current_uid*. + * + * int bpf_get_current_comm(char *buf, u32 size_of_buf) + * Description + * Copy the **comm** attribute of the current task into *buf* of + * *size_of_buf*. The **comm** attribute contains the name of + * the executable (excluding the path) for the current task. The + * *size_of_buf* must be strictly positive. On success, the + * helper makes sure that the *buf* is NUL-terminated. On failure, + * it is filled with zeroes. + * Return + * 0 on success, or a negative error in case of failure. + * + * u32 bpf_get_cgroup_classid(struct sk_buff *skb) + * Description + * Retrieve the classid for the current task, i.e. for the net_cls + * cgroup to which *skb* belongs. + * + * This helper can be used on TC egress path, but not on ingress. + * + * The net_cls cgroup provides an interface to tag network packets + * based on a user-provided identifier for all traffic coming from + * the tasks belonging to the related cgroup. See also the related + * kernel documentation, available from the Linux sources in file + * *Documentation/cgroup-v1/net_cls.txt*. + * + * The Linux kernel has two versions for cgroups: there are + * cgroups v1 and cgroups v2. Both are available to users, who can + * use a mixture of them, but note that the net_cls cgroup is for + * cgroup v1 only. This makes it incompatible with BPF programs + * run on cgroups, which is a cgroup-v2-only feature (a socket can + * only hold data for one version of cgroups at a time). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to + * "**y**" or to "**m**". + * Return + * The classid, or 0 for the default unconfigured classid. + * + * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * Description + * Push a *vlan_tci* (VLAN tag control information) of protocol + * *vlan_proto* to the packet associated to *skb*, then update + * the checksum. Note that if *vlan_proto* is different from + * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to + * be **ETH_P_8021Q**. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_vlan_pop(struct sk_buff *skb) + * Description + * Pop a VLAN header from the packet associated to *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Get tunnel metadata. This helper takes a pointer *key* to an + * empty **struct bpf_tunnel_key** of **size**, that will be + * filled with tunnel metadata for the packet associated to *skb*. + * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which + * indicates that the tunnel is based on IPv6 protocol instead of + * IPv4. + * + * The **struct bpf_tunnel_key** is an object that generalizes the + * principal parameters used by various tunneling protocols into a + * single struct. This way, it can be used to easily make a + * decision based on the contents of the encapsulation header, + * "summarized" in this struct. In particular, it holds the IP + * address of the remote end (IPv4 or IPv6, depending on the case) + * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, + * this struct exposes the *key*\ **->tunnel_id**, which is + * generally mapped to a VNI (Virtual Network Identifier), making + * it programmable together with the **bpf_skb_set_tunnel_key**\ + * () helper. + * + * Let's imagine that the following code is part of a program + * attached to the TC ingress interface, on one end of a GRE + * tunnel, and is supposed to filter out all messages coming from + * remote ends with IPv4 address other than 10.0.0.1: + * + * :: + * + * int ret; + * struct bpf_tunnel_key key = {}; + * + * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + * if (ret < 0) + * return TC_ACT_SHOT; // drop packet + * + * if (key.remote_ipv4 != 0x0a000001) + * return TC_ACT_SHOT; // drop packet + * + * return TC_ACT_OK; // accept packet + * + * This interface can also be used with all encapsulation devices + * that can operate in "collect metadata" mode: instead of having + * one network device per specific configuration, the "collect + * metadata" mode only requires a single device where the + * configuration can be extracted from this helper. + * + * This can be used together with various tunnels such as VXLan, + * Geneve, GRE or IP in IP (IPIP). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Populate tunnel metadata for packet associated to *skb.* The + * tunnel metadata is set to the contents of *key*, of *size*. The + * *flags* can be set to a combination of the following values: + * + * **BPF_F_TUNINFO_IPV6** + * Indicate that the tunnel is based on IPv6 protocol + * instead of IPv4. + * **BPF_F_ZERO_CSUM_TX** + * For IPv4 packets, add a flag to tunnel metadata + * indicating that checksum computation should be skipped + * and checksum set to zeroes. + * **BPF_F_DONT_FRAGMENT** + * Add a flag to tunnel metadata indicating that the + * packet should not be fragmented. + * **BPF_F_SEQ_NUMBER** + * Add a flag to tunnel metadata indicating that a + * sequence number should be added to tunnel header before + * sending the packet. This flag was added for GRE + * encapsulation, but might be used with other protocols + * as well in the future. + * + * Here is a typical usage on the transmit path: + * + * :: + * + * struct bpf_tunnel_key key; + * populate key ... + * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); + * + * See also the description of the **bpf_skb_get_tunnel_key**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) + * Description + * Read the value of a perf event counter. This helper relies on a + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of + * the perf event counter is selected when *map* is updated with + * perf event file descriptors. The *map* is an array whose size + * is the number of available CPUs, and each cell contains a value + * relative to one CPU. The value to retrieve is indicated by + * *flags*, that contains the index of the CPU to look up, masked + * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * Note that before Linux 4.13, only hardware perf event can be + * retrieved. + * + * Also, be aware that the newer helper + * **bpf_perf_event_read_value**\ () is recommended over + * **bpf_perf_event_read*\ () in general. The latter has some ABI + * quirks where error and counter value are used as a return code + * (which is wrong to do since ranges may overlap). This issue is + * fixed with bpf_perf_event_read_value(), which at the same time + * provides more features over the **bpf_perf_event_read**\ () + * interface. Please refer to the description of + * **bpf_perf_event_read_value**\ () for details. + * Return + * The value of the perf event counter read from the map, or a + * negative error code in case of failure. + * + * int bpf_redirect(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_clone_redirect**\ + * (), except that the packet is not cloned, which provides + * increased performance. + * + * Except for XDP, both ingress and egress interfaces can be used + * for redirection. The **BPF_F_INGRESS** value in *flags* is used + * to make the distinction (ingress path is selected if the flag + * is present, egress path otherwise). Currently, XDP only + * supports redirection to the egress interface, and accepts no + * flag at all. + * + * The same effect can be attained with the more generic + * **bpf_redirect_map**\ (), which requires specific maps to be + * used but offers better performance. + * Return + * For XDP, the helper returns **XDP_REDIRECT** on success or + * **XDP_ABORTED** on error. For other program types, the values + * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on + * error. + * + * u32 bpf_get_route_realm(struct sk_buff *skb) + * Description + * Retrieve the realm or the route, that is to say the + * **tclassid** field of the destination for the *skb*. The + * indentifier retrieved is a user-provided tag, similar to the + * one used with the net_cls cgroup (see description for + * **bpf_get_cgroup_classid**\ () helper), but here this tag is + * held by a route (a destination entry), not by a task. + * + * Retrieving this identifier works with the clsact TC egress hook + * (see also **tc-bpf(8)**), or alternatively on conventional + * classful egress qdiscs, but not on TC ingress path. In case of + * clsact TC egress hook, this has the advantage that, internally, + * the destination entry has not been dropped yet in the transmit + * path. Therefore, the destination entry does not need to be + * artificially held via **netif_keep_dst**\ () for a classful + * qdisc until the *skb* is freed. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_IP_ROUTE_CLASSID** configuration option. + * Return + * The realm of the route for the packet associated to *skb*, or 0 + * if none was found. + * + * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * The context of the program *ctx* needs also be passed to the + * helper. + * + * On user space, a program willing to read the values needs to + * call **perf_event_open**\ () on the perf event (either for + * one or for all CPUs) and to store the file descriptor into the + * *map*. This must be done before the eBPF program can send data + * into it. An example is available in file + * *samples/bpf/trace_output_user.c* in the Linux kernel source + * tree (the eBPF program counterpart is in + * *samples/bpf/trace_output_kern.c*). + * + * **bpf_perf_event_output**\ () achieves better performance + * than **bpf_trace_printk**\ () for sharing data with user + * space, and is much better suitable for streaming data from eBPF + * programs. + * + * Note that this helper is not restricted to tracing use cases + * and can be used with programs attached to TC or XDP as well, + * where it allows for passing data to user space listeners. Data + * can be: + * + * * Only custom structs, + * * Only the packet payload, or + * * A combination of both. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) + * Description + * This helper was provided as an easy way to load data from a + * packet. It can be used to load *len* bytes from *offset* from + * the packet associated to *skb*, into the buffer pointed by + * *to*. + * + * Since Linux 4.7, usage of this helper has mostly been replaced + * by "direct packet access", enabling packet data to be + * manipulated with *skb*\ **->data** and *skb*\ **->data_end** + * pointing respectively to the first byte of packet data and to + * the byte after the last byte of packet data. However, it + * remains useful if one wishes to read large quantities of data + * at once from a packet into the eBPF stack. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) + * Description + * Walk a user or a kernel stack and return its id. To achieve + * this, the helper needs *ctx*, which is a pointer to the context + * on which the tracing program is executed, and a pointer to a + * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * a combination of the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_FAST_STACK_CMP** + * Compare stacks by hash only. + * **BPF_F_REUSE_STACKID** + * If two different stacks hash into the same *stackid*, + * discard the old one. + * + * The stack id retrieved is a 32 bit long integer handle which + * can be further combined with other data (including other stack + * ids) and used as a key into maps. This can be useful for + * generating a variety of graphs (such as flame graphs or off-cpu + * graphs). + * + * For walking a stack, this helper is an improvement over + * **bpf_probe_read**\ (), which can be used with unrolled loops + * but is not efficient and consumes a lot of eBPF instructions. + * Instead, **bpf_get_stackid**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * + * Return + * The positive or null stack id on success, or a negative error + * in case of failure. + * + * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) + * Description + * Compute a checksum difference, from the raw buffer pointed by + * *from*, of length *from_size* (that must be a multiple of 4), + * towards the raw buffer pointed by *to*, of size *to_size* + * (same remark). An optional *seed* can be added to the value + * (this can be cascaded, the seed may come from a previous call + * to the helper). + * + * This is flexible enough to be used in several ways: + * + * * With *from_size* == 0, *to_size* > 0 and *seed* set to + * checksum, it can be used when pushing new data. + * * With *from_size* > 0, *to_size* == 0 and *seed* set to + * checksum, it can be used when removing data from a packet. + * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it + * can be used to compute a diff. Note that *from_size* and + * *to_size* do not need to be equal. + * + * This helper can be used in combination with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to + * which one can feed in the difference computed with + * **bpf_csum_diff**\ (). + * Return + * The checksum result, or a negative error code in case of + * failure. + * + * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Retrieve tunnel options metadata for the packet associated to + * *skb*, and store the raw tunnel option data to the buffer *opt* + * of *size*. + * + * This helper can be used with encapsulation devices that can + * operate in "collect metadata" mode (please refer to the related + * note in the description of **bpf_skb_get_tunnel_key**\ () for + * more details). A particular example where this can be used is + * in combination with the Geneve encapsulation protocol, where it + * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) + * and retrieving arbitrary TLVs (Type-Length-Value headers) from + * the eBPF program. This allows for full customization of these + * headers. + * Return + * The size of the option data retrieved. + * + * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Set tunnel options metadata for the packet associated to *skb* + * to the option data contained in the raw buffer *opt* of *size*. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * Description + * Change the protocol of the *skb* to *proto*. Currently + * supported are transition from IPv4 to IPv6, and from IPv6 to + * IPv4. The helper takes care of the groundwork for the + * transition, including resizing the socket buffer. The eBPF + * program is expected to fill the new headers, if any, via + * **skb_store_bytes**\ () and to recompute the checksums with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ + * (). The main case for this helper is to perform NAT64 + * operations out of an eBPF program. + * + * Internally, the GSO type is marked as dodgy so that headers are + * checked and segments are recalculated by the GSO/GRO engine. + * The size for GSO target is adapted as well. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * Description + * Change the packet type for the packet associated to *skb*. This + * comes down to setting *skb*\ **->pkt_type** to *type*, except + * the eBPF program does not have a write access to *skb*\ + * **->pkt_type** beside this helper. Using a helper here allows + * for graceful handling of errors. + * + * The major use case is to change incoming *skb*s to + * **PACKET_HOST** in a programmatic way instead of having to + * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for + * example. + * + * Note that *type* only allows certain values. At this time, they + * are: + * + * **PACKET_HOST** + * Packet is for us. + * **PACKET_BROADCAST** + * Send packet to all. + * **PACKET_MULTICAST** + * Send packet to group. + * **PACKET_OTHERHOST** + * Send packet to someone else. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * Description + * Check whether *skb* is a descendant of the cgroup2 held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* failed the cgroup2 descendant test. + * * 1, if the *skb* succeeded the cgroup2 descendant test. + * * A negative error code, if an error occurred. + * + * u32 bpf_get_hash_recalc(struct sk_buff *skb) + * Description + * Retrieve the hash of the packet, *skb*\ **->hash**. If it is + * not set, in particular if the hash was cleared due to mangling, + * recompute this hash. Later accesses to the hash can be done + * directly with *skb*\ **->hash**. + * + * Calling **bpf_set_hash_invalid**\ (), changing a packet + * prototype with **bpf_skb_change_proto**\ (), or calling + * **bpf_skb_store_bytes**\ () with the + * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear + * the hash and to trigger a new computation for the next call to + * **bpf_get_hash_recalc**\ (). + * Return + * The 32-bit hash. * * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - * - * int bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - * - * int bpf_current_task_under_cgroup(map, index) - * Check cgroup2 membership of current task - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 current failed the cgroup2 descendant test - * == 1 current succeeded the cgroup2 descendant test - * < 0 error - * - * int bpf_skb_change_tail(skb, len, flags) - * The helper will resize the skb to the given new size, to be used f.e. - * with control messages. - * @skb: pointer to skb - * @len: new skb length - * @flags: reserved - * Return: 0 on success or negative error - * - * int bpf_skb_pull_data(skb, len) - * The helper will pull in non-linear data in case the skb is non-linear - * and not all of len are part of the linear section. Only needed for - * read/write with direct packet access. - * @skb: pointer to skb - * @len: len to make read/writeable - * Return: 0 on success or negative error - * - * s64 bpf_csum_update(skb, csum) - * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. - * @skb: pointer to skb - * @csum: csum to add - * Return: csum on success or negative error - * - * void bpf_set_hash_invalid(skb) - * Invalidate current skb->hash. - * @skb: pointer to skb - * - * int bpf_get_numa_node_id() - * Return: Id of current NUMA node. - * - * int bpf_skb_change_head() - * Grows headroom of skb and adjusts MAC header offset accordingly. - * Will extends/reallocae as required automatically. - * May change skb data pointer and will thus invalidate any check - * performed for direct packet access. - * @skb: pointer to skb - * @len: length of header to be pushed in front - * @flags: Flags (unused for now) - * Return: 0 on success or negative error - * - * int bpf_xdp_adjust_head(xdp_md, delta) - * Adjust the xdp_md.data by delta - * @xdp_md: pointer to xdp_md - * @delta: An positive/negative integer to be added to xdp_md.data - * Return: 0 on success or negative on error + * Return + * A pointer to the current task struct. + * + * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * Description + * Attempt in a safe way to write *len* bytes from the buffer + * *src* to *dst* in memory. It only works for threads that are in + * user context, and *dst* must be a valid user space address. + * + * This helper should not be used to implement any kind of + * security mechanism because of TOC-TOU attacks, but rather to + * debug, divert, and manipulate execution of semi-cooperative + * processes. + * + * Keep in mind that this feature is meant for experiments, and it + * has a risk of crashing the system and running programs. + * Therefore, when an eBPF program using this helper is attached, + * a warning including PID and process name is printed to kernel + * logs. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * Description + * Check whether the probe is being run is the context of a given + * subset of the cgroup2 hierarchy. The cgroup2 to test is held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* task belongs to the cgroup2. + * * 1, if the *skb* task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Resize (trim or grow) the packet associated to *skb* to the + * new *len*. The *flags* are reserved for future usage, and must + * be left at zero. + * + * The basic idea is that the helper performs the needed work to + * change the size of the packet, then the eBPF program rewrites + * the rest via helpers like **bpf_skb_store_bytes**\ (), + * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () + * and others. This helper is a slow path utility intended for + * replies with control messages. And because it is targeted for + * slow path, the helper itself can afford to be slow: it + * implicitly linearizes, unclones and drops offloads from the + * *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * Description + * Pull in non-linear data in case the *skb* is non-linear and not + * all of *len* are part of the linear section. Make *len* bytes + * from *skb* readable and writable. If a zero value is passed for + * *len*, then the whole length of the *skb* is pulled. + * + * This helper is only needed for reading and writing with direct + * packet access. + * + * For direct packet access, testing that offsets to access + * are within packet boundaries (test on *skb*\ **->data_end**) is + * susceptible to fail if offsets are invalid, or if the requested + * data is in non-linear parts of the *skb*. On failure the + * program can just bail out, or in the case of a non-linear + * buffer, use a helper to make the data available. The + * **bpf_skb_load_bytes**\ () helper is a first solution to access + * the data. Another one consists in using **bpf_skb_pull_data** + * to pull in once the non-linear parts, then retesting and + * eventually access the data. + * + * At the same time, this also makes sure the *skb* is uncloned, + * which is a necessary condition for direct write. As this needs + * to be an invariant for the write part only, the verifier + * detects writes and adds a prologue that is calling + * **bpf_skb_pull_data()** to effectively unclone the *skb* from + * the very beginning in case it is indeed cloned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) + * Description + * Add the checksum *csum* into *skb*\ **->csum** in case the + * driver has supplied a checksum for the entire packet into that + * field. Return an error otherwise. This helper is intended to be + * used in combination with **bpf_csum_diff**\ (), in particular + * when the checksum needs to be updated after data has been + * written into the packet through direct packet access. + * Return + * The checksum on success, or a negative error code in case of + * failure. + * + * void bpf_set_hash_invalid(struct sk_buff *skb) + * Description + * Invalidate the current *skb*\ **->hash**. It can be used after + * mangling on headers through direct packet access, in order to + * indicate that the hash is outdated and to trigger a + * recalculation the next time the kernel tries to access this + * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * + * int bpf_get_numa_node_id(void) + * Description + * Return the id of the current NUMA node. The primary use case + * for this helper is the selection of sockets for the local NUMA + * node, when the program is attached to sockets using the + * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), + * but the helper is also available to other eBPF program types, + * similarly to **bpf_get_smp_processor_id**\ (). + * Return + * The id of current NUMA node. + * + * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Grows headroom of packet associated to *skb* and adjusts the + * offset of the MAC header accordingly, adding *len* bytes of + * space. It automatically extends and reallocates memory as + * required. + * + * This helper can be used on a layer 3 *skb* to push a MAC header + * for redirection into a layer 2 device. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that + * it is possible to use a negative value for *delta*. This helper + * can be used to prepare the packet for pushing or popping + * headers. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. * * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) - * Copy a NUL terminated string from unsafe address. In case the string - * length is smaller than size, the target is not padded with further NUL - * bytes. In case the string length is larger than size, just count-1 - * bytes are copied and the last byte is set to NUL. - * @dst: destination address - * @size: maximum number of bytes to copy, including the trailing NUL - * @unsafe_ptr: unsafe address - * Return: - * > 0 length of the string including the trailing NUL on success - * < 0 error - * - * u64 bpf_get_socket_cookie(skb) - * Get the cookie for the socket stored inside sk_buff. - * @skb: pointer to skb - * Return: 8 Bytes non-decreasing number on success or 0 if the socket - * field is missing inside sk_buff - * - * u32 bpf_get_socket_uid(skb) - * Get the owner uid of the socket stored inside sk_buff. - * @skb: pointer to skb - * Return: uid of the socket owner on success or overflowuid if failed. - * - * u32 bpf_set_hash(skb, hash) - * Set full skb->hash. - * @skb: pointer to skb - * @hash: hash to set - * - * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) - * Calls setsockopt. Not all opts are available, only those with - * integer optvals plus TCP_CONGESTION. - * Supported levels: SOL_SOCKET and IPPROTO_TCP - * @bpf_socket: pointer to bpf_socket - * @level: SOL_SOCKET or IPPROTO_TCP - * @optname: option name - * @optval: pointer to option value - * @optlen: length of optval in bytes - * Return: 0 or negative error - * - * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) - * Calls getsockopt. Not all opts are available. - * Supported levels: IPPROTO_TCP - * @bpf_socket: pointer to bpf_socket - * @level: IPPROTO_TCP - * @optname: option name - * @optval: pointer to option value - * @optlen: length of optval in bytes - * Return: 0 or negative error - * - * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) - * Set callback flags for sock_ops - * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct - * @flags: flags value - * Return: 0 for no error - * -EINVAL if there is no full tcp socket - * bits in flags that are not supported by current kernel - * - * int bpf_skb_adjust_room(skb, len_diff, mode, flags) - * Grow or shrink room in sk_buff. - * @skb: pointer to skb - * @len_diff: (signed) amount of room to grow/shrink - * @mode: operation mode (enum bpf_adj_room_mode) - * @flags: reserved for future use - * Return: 0 on success or negative error code - * - * int bpf_sk_redirect_map(map, key, flags) - * Redirect skb to a sock in map using key as a lookup key for the - * sock in map. - * @map: pointer to sockmap - * @key: key to lookup sock in map - * @flags: reserved for future use - * Return: SK_PASS - * - * int bpf_sock_map_update(skops, map, key, flags) - * @skops: pointer to bpf_sock_ops - * @map: pointer to sockmap to update - * @key: key to insert/update sock in map - * @flags: same flags as map update elem - * - * int bpf_xdp_adjust_meta(xdp_md, delta) - * Adjust the xdp_md.data_meta by delta - * @xdp_md: pointer to xdp_md - * @delta: An positive/negative integer to be added to xdp_md.data_meta - * Return: 0 on success or negative on error - * - * int bpf_perf_event_read_value(map, flags, buf, buf_size) - * read perf event counter value and perf event enabled/running time - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * @buf: buf to fill - * @buf_size: size of the buf - * Return: 0 on success or negative error code - * - * int bpf_perf_prog_read_value(ctx, buf, buf_size) - * read perf prog attached perf event counter and enabled/running time - * @ctx: pointer to ctx - * @buf: buf to fill - * @buf_size: size of the buf - * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set - * - * int bpf_msg_redirect_map(map, key, flags) - * Redirect msg to a sock in map using key as a lookup key for the - * sock in map. - * @map: pointer to sockmap - * @key: key to lookup sock in map - * @flags: reserved for future use - * Return: SK_PASS - * - * int bpf_bind(ctx, addr, addr_len) - * Bind socket to address. Only binding to IP is supported, no port can be - * set in addr. - * @ctx: pointer to context of type bpf_sock_addr - * @addr: pointer to struct sockaddr to bind socket to - * @addr_len: length of sockaddr structure - * Return: 0 on success or negative error code + * Description + * Copy a NUL terminated string from an unsafe address + * *unsafe_ptr* to *dst*. The *size* should include the + * terminating NUL byte. In case the string length is smaller than + * *size*, the target is not padded with further NUL bytes. If the + * string length is larger than *size*, just *size*-1 bytes are + * copied and the last byte is set to NUL. + * + * On success, the length of the copied string is returned. This + * makes this helper useful in tracing programs for reading + * strings, and more importantly to get its length at runtime. See + * the following snippet: + * + * :: + * + * SEC("kprobe/sys_open") + * void bpf_sys_open(struct pt_regs *ctx) + * { + * char buf[PATHLEN]; // PATHLEN is defined to 256 + * int res = bpf_probe_read_str(buf, sizeof(buf), + * ctx->di); + * + * // Consume buf, for example push it to + * // userspace via bpf_perf_event_output(); we + * // can use res (the string length) as event + * // size, after checking its boundaries. + * } + * + * In comparison, using **bpf_probe_read()** helper here instead + * to read the string would require to estimate the length at + * compile time, and would often result in copying more memory + * than necessary. + * + * Another useful use case is when parsing individual process + * arguments or individual environment variables navigating + * *current*\ **->mm->arg_start** and *current*\ + * **->mm->env_start**: using this helper and the return value, + * one can quickly iterate at the right offset of the memory area. + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * u64 bpf_get_socket_cookie(struct sk_buff *skb) + * Description + * If the **struct sk_buff** pointed by *skb* has a known socket, + * retrieve the cookie (generated by the kernel) of this socket. + * If no cookie has been set yet, generate a new cookie. Once + * generated, the socket cookie remains stable for the life of the + * socket. This helper can be useful for monitoring per socket + * networking traffic statistics as it provides a unique socket + * identifier per namespace. + * Return + * A 8-byte long non-decreasing number on success, or 0 if the + * socket field is missing inside *skb*. + * + * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Return + * The owner UID of the socket associated to *skb*. If the socket + * is **NULL**, or if it is not a full socket (i.e. if it is a + * time-wait or a request socket instead), **overflowuid** value + * is returned (note that **overflowuid** might also be the actual + * UID value for the socket). + * + * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * Description + * Set the full hash for *skb* (set the field *skb*\ **->hash**) + * to value *hash*. + * Return + * 0 + * + * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **setsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **setsockopt(2)** for more information. + * The option value of length *optlen* is pointed by *optval*. + * + * This helper actually implements a subset of **setsockopt()**. + * It supports the following *level*\ s: + * + * * **SOL_SOCKET**, which supports the following *optname*\ s: + * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * * **IPPROTO_TCP**, which supports the following *optname*\ s: + * **TCP_CONGESTION**, **TCP_BPF_IW**, + * **TCP_BPF_SNDCWND_CLAMP**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * Description + * Grow or shrink the room for data in the packet associated to + * *skb* by *len_diff*, and according to the selected *mode*. + * + * There is a single supported mode at this time: + * + * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer + * (room space is added or removed below the layer 3 header). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the endpoint referenced by *map* at + * index *key*. Depending on its type, this *map* can contain + * references to net devices (for forwarding packets through other + * ports), or to CPUs (for redirecting XDP frames to another CPU; + * but this is only implemented for native XDP (with driver + * support) as of this writing). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * When used to redirect packets to net devices, this helper + * provides a high performance increase over **bpf_redirect**\ (). + * This is due to various implementation details of the underlying + * mechanisms, one of which is the fact that **bpf_redirect_map**\ + * () tries to send packet as a "bulk" to the device. + * Return + * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * + * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a *map* referencing sockets. The + * *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust the address pointed by *xdp_md*\ **->data_meta** by + * *delta* (which can be positive or negative). Note that this + * operation modifies the address stored in *xdp_md*\ **->data**, + * so the latter must be loaded only after the helper has been + * called. + * + * The use of *xdp_md*\ **->data_meta** is optional and programs + * are not required to use it. The rationale is that when the + * packet is processed with XDP (e.g. as DoS filter), it is + * possible to push further meta data along with it before passing + * to the stack, and to give the guarantee that an ingress eBPF + * program attached as a TC classifier on the same device can pick + * this up for further post-processing. Since TC works with socket + * buffers, it remains possible to set from XDP the **mark** or + * **priority** pointers, or other pointers for the socket buffer. + * Having this scratch space generic and programmable allows for + * more flexibility as the user is free to store whatever meta + * data they need. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * Read the value of a perf event counter, and store it into *buf* + * of size *buf_size*. This helper relies on a *map* of type + * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event + * counter is selected when *map* is updated with perf event file + * descriptors. The *map* is an array whose size is the number of + * available CPUs, and each cell contains a value relative to one + * CPU. The value to retrieve is indicated by *flags*, that + * contains the index of the CPU to look up, masked with + * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * This helper behaves in a way close to + * **bpf_perf_event_read**\ () helper, save that instead of + * just returning the value observed, it fills the *buf* + * structure. This allows for additional data to be retrieved: in + * particular, the enabled and running times (in *buf*\ + * **->enabled** and *buf*\ **->running**, respectively) are + * copied. In general, **bpf_perf_event_read_value**\ () is + * recommended over **bpf_perf_event_read**\ (), which has some + * ABI issues and provides fewer functionalities. + * + * These values are interesting, because hardware PMU (Performance + * Monitoring Unit) counters are limited resources. When there are + * more PMU based perf events opened than available counters, + * kernel will multiplex these events so each event gets certain + * percentage (but not all) of the PMU time. In case that + * multiplexing happens, the number of samples or counter value + * will not reflect the case compared to when no multiplexing + * occurs. This makes comparison between different runs difficult. + * Typically, the counter value should be normalized before + * comparing to other experiments. The usual normalization is done + * as follows. + * + * :: + * + * normalized_counter = counter * t_enabled / t_running + * + * Where t_enabled is the time enabled for event and t_running is + * the time running for event since last normalization. The + * enabled and running times are accumulated since the perf event + * open. To achieve scaling factor between two invocations of an + * eBPF program, users can can use CPU id as the key (which is + * typical for perf array usage model) to remember the previous + * value and do the calculation inside the eBPF program. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * For en eBPF program attached to a perf event, retrieve the + * value of the event counter associated to *ctx* and store it in + * the structure pointed by *buf* and of size *buf_size*. Enabled + * and running times are also stored in the structure (see + * description of helper **bpf_perf_event_read_value**\ () for + * more details). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **getsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **getsockopt(2)** for more information. + * The retrieved value is stored in the structure pointed by + * *opval* and of length *optlen*. + * + * This helper actually implements a subset of **getsockopt()**. + * It supports the following *level*\ s: + * + * * **IPPROTO_TCP**, which supports *optname* + * **TCP_CONGESTION**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_override_return(struct pt_reg *regs, u64 rc) + * Description + * Used for error injection, this helper uses kprobes to override + * the return value of the probed function, and to set it to *rc*. + * The first argument is the context *regs* on which the kprobe + * works. + * + * This helper works by setting setting the PC (program counter) + * to an override function which is run in place of the original + * probed function. This means the probed function is not run at + * all. The replacement function just returns with the required + * value. + * + * This helper has security implications, and thus is subject to + * restrictions. It is only available if the kernel was compiled + * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration + * option, and in this case it only works on functions tagged with + * **ALLOW_ERROR_INJECTION** in the kernel code. + * + * Also, the helper is only available for the architectures having + * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, + * x86 architecture is the only one to support this feature. + * Return + * 0 + * + * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval) + * Description + * Attempt to set the value of the **bpf_sock_ops_cb_flags** field + * for the full TCP socket associated to *bpf_sock_ops* to + * *argval*. + * + * The primary use of this field is to determine if there should + * be calls to eBPF programs of type + * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP + * code. A program of the same type can change its value, per + * connection and as necessary, when the connection is + * established. This field is directly accessible for reading, but + * this helper must be used for updates in order to return an + * error if an eBPF program tries to set a callback that is not + * supported in the current kernel. + * + * The supported callback values that *argval* can combine are: + * + * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) + * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) + * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * + * Here are some examples of where one could call such eBPF + * program: + * + * * When RTO fires. + * * When a packet is retransmitted. + * * When the connection terminates. + * * When a packet is sent. + * * When a packet is received. + * Return + * Code **-EINVAL** if the socket is not a full TCP socket; + * otherwise, a positive number containing the bits that could not + * be set is returned (which comes down to 0 if all bits were set + * as required). + * + * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, apply the verdict of the eBPF program to + * the next *bytes* (number of bytes) of message *msg*. + * + * For example, this helper can be used in the following cases: + * + * * A single **sendmsg**\ () or **sendfile**\ () system call + * contains multiple logical messages that the eBPF program is + * supposed to read and for which it should apply a verdict. + * * An eBPF program only cares to read the first *bytes* of a + * *msg*. If the message has a large payload, then setting up + * and calling the eBPF program repeatedly for all bytes, even + * though the verdict is already known, would create unnecessary + * overhead. + * + * When called from within an eBPF program, the helper sets a + * counter internal to the BPF infrastructure, that is used to + * apply the last verdict to the next *bytes*. If *bytes* is + * smaller than the current data being processed from a + * **sendmsg**\ () or **sendfile**\ () system call, the first + * *bytes* will be sent and the eBPF program will be re-run with + * the pointer for start of data pointing to byte number *bytes* + * **+ 1**. If *bytes* is larger than the current data being + * processed, then the eBPF verdict will be applied to multiple + * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are + * consumed. + * + * Note that if a socket closes with the internal counter holding + * a non-zero value, this is not a problem because data is not + * being buffered for *bytes* and is sent as it is received. + * Return + * 0 + * + * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, prevent the execution of the verdict eBPF + * program for message *msg* until *bytes* (byte number) have been + * accumulated. + * + * This can be used when one needs a specific number of bytes + * before a verdict can be assigned, even if the data spans + * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme + * case would be a user calling **sendmsg**\ () repeatedly with + * 1-byte long message segments. Obviously, this is bad for + * performance, but it is still valid. If the eBPF program needs + * *bytes* bytes to validate a header, this helper can be used to + * prevent the eBPF program to be called again until *bytes* have + * been accumulated. + * Return + * 0 + * + * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * Description + * For socket policies, pull in non-linear data from user space + * for *msg* and set pointers *msg*\ **->data** and *msg*\ + * **->data_end** to *start* and *end* bytes offsets into *msg*, + * respectively. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it can only parse data that the (**data**, **data_end**) + * pointers have already consumed. For **sendmsg**\ () hooks this + * is likely the first scatterlist element. But for calls relying + * on the **sendpage** handler (e.g. **sendfile**\ ()) this will + * be the range (**0**, **0**) because the data is shared with + * user space and by default the objective is to avoid allowing + * user space to modify data while (or after) eBPF verdict is + * being decided. This helper can be used to pull in data and to + * set the start and end pointer to given values. Data will be + * copied if necessary (i.e. if data was not linear and if start + * and end pointers do not point to the same chunk). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len) + * Description + * Bind the socket associated to *ctx* to the address pointed by + * *addr*, of length *addr_len*. This allows for making outgoing + * connection from the desired IP address, which can be useful for + * example when all processes inside a cgroup should use one + * single IP address on a host that has multiple IP configured. + * + * This helper works for IPv4 and IPv6, TCP and UDP sockets. The + * domain (*addr*\ **->sa_family**) must be **AF_INET** (or + * **AF_INET6**). Looking for a free port to bind to can be + * expensive, therefore binding to port is not permitted by the + * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) + * must be set to zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is + * only possible to shrink the packet as of this writing, + * therefore *delta* must be a negative integer. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * Description + * Retrieve the XFRM state (IP transform framework, see also + * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. + * + * The retrieved value is stored in the **struct bpf_xfrm_state** + * pointed by *xfrm_state* and of length *size*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_XFRM** configuration option. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -821,7 +1833,9 @@ union bpf_attr { FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ FN(msg_pull_data), \ - FN(bind), + FN(bind), \ + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -927,6 +1941,19 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + /* Generic BPF return codes which all BPF program types may support. * The values are binary compatible with their TC_ACT_* counter-part to * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT @@ -1017,6 +2044,7 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 gpl_compatible:1; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h new file mode 100644 index 000000000000..bcb56ee47014 --- /dev/null +++ b/include/uapi/linux/btf.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2018 Facebook */ +#ifndef _UAPI__LINUX_BTF_H__ +#define _UAPI__LINUX_BTF_H__ + +#include <linux/types.h> + +#define BTF_MAGIC 0xeB9F +#define BTF_VERSION 1 + +struct btf_header { + __u16 magic; + __u8 version; + __u8 flags; + + __u32 parent_label; + __u32 parent_name; + + /* All offsets are in bytes relative to the end of this header */ + __u32 label_off; /* offset of label section */ + __u32 object_off; /* offset of data object section*/ + __u32 func_off; /* offset of function section */ + __u32 type_off; /* offset of type section */ + __u32 str_off; /* offset of string section */ + __u32 str_len; /* length of string section */ +}; + +/* Max # of type identifier */ +#define BTF_MAX_TYPE 0x7fffffff +/* Max offset into the string section */ +#define BTF_MAX_NAME_OFFSET 0x7fffffff +/* Max # of struct/union/enum members or func args */ +#define BTF_MAX_VLEN 0xffff + +/* The type id is referring to a parent BTF */ +#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1) +#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE) + +/* String is in the ELF string section */ +#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1) +#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET) + +struct btf_type { + __u32 name_off; + /* "info" bits arrangement + * bits 0-15: vlen (e.g. # of struct's members) + * bits 16-23: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused + * bits 31: root + */ + __u32 info; + /* "size" is used by INT, ENUM, STRUCT and UNION. + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is a type_id referring to another type. + */ + union { + __u32 size; + __u32 type; + }; +}; + +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) +#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80)) +#define BTF_INFO_VLEN(info) ((info) & 0xffff) + +#define BTF_KIND_UNKN 0 /* Unknown */ +#define BTF_KIND_INT 1 /* Integer */ +#define BTF_KIND_PTR 2 /* Pointer */ +#define BTF_KIND_ARRAY 3 /* Array */ +#define BTF_KIND_STRUCT 4 /* Struct */ +#define BTF_KIND_UNION 5 /* Union */ +#define BTF_KIND_ENUM 6 /* Enumeration */ +#define BTF_KIND_FWD 7 /* Forward */ +#define BTF_KIND_TYPEDEF 8 /* Typedef */ +#define BTF_KIND_VOLATILE 9 /* Volatile */ +#define BTF_KIND_CONST 10 /* Const */ +#define BTF_KIND_RESTRICT 11 /* Restrict */ +#define BTF_KIND_MAX 11 +#define NR_BTF_KINDS 12 + +/* For some specific BTF_KIND, "struct btf_type" is immediately + * followed by extra data. + */ + +/* BTF_KIND_INT is followed by a u32 and the following + * is the 32 bits arrangement: + */ +#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24) +#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) +#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) + +/* Attributes stored in the BTF_INT_ENCODING */ +#define BTF_INT_SIGNED 0x1 +#define BTF_INT_CHAR 0x2 +#define BTF_INT_BOOL 0x4 +#define BTF_INT_VARARGS 0x8 + +/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". + * The exact number of btf_enum is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum { + __u32 name_off; + __s32 val; +}; + +/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ +struct btf_array { + __u32 type; + __u32 index_type; + __u32 nelems; +}; + +/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed + * by multiple "struct btf_member". The exact number + * of btf_member is stored in the vlen (of the info in + * "struct btf_type"). + */ +struct btf_member { + __u32 name_off; + __u32 type; + __u32 offset; /* offset in bits */ +}; + +#endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h index 68ff25414700..db210625cee8 100644 --- a/include/uapi/linux/cn_proc.h +++ b/include/uapi/linux/cn_proc.h @@ -116,12 +116,16 @@ struct proc_event { struct coredump_proc_event { __kernel_pid_t process_pid; __kernel_pid_t process_tgid; + __kernel_pid_t parent_pid; + __kernel_pid_t parent_tgid; } coredump; struct exit_proc_event { __kernel_pid_t process_pid; __kernel_pid_t process_tgid; __u32 exit_code, exit_signal; + __kernel_pid_t parent_pid; + __kernel_pid_t parent_tgid; } exit; } event_data; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 68699f654118..b85266420bfb 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -516,6 +516,7 @@ enum { IFLA_VXLAN_COLLECT_METADATA, IFLA_VXLAN_LABEL, IFLA_VXLAN_GPE, + IFLA_VXLAN_TTL_INHERIT, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 103ba797a8f3..83ade9b5cf95 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -506,6 +506,8 @@ #define PCI_EXP_DEVCTL_READRQ_256B 0x1000 /* 256 Bytes */ #define PCI_EXP_DEVCTL_READRQ_512B 0x2000 /* 512 Bytes */ #define PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */ +#define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ #define PCI_EXP_DEVSTA 10 /* Device Status */ #define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 33a70ece462f..d02e859301ff 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -276,6 +276,8 @@ enum LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */ LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ + LINUX_MIB_TCPDELIVERED, /* TCPDelivered */ + LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 560374c978f9..29eb659aa77a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -122,6 +122,10 @@ enum { #define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ #define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ #define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ +#define TCP_ZEROCOPY_RECEIVE 35 +#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ + +#define TCP_CM_INQ TCP_INQ struct tcp_repair_opt { __u32 opt_code; @@ -224,6 +228,9 @@ struct tcp_info { __u64 tcpi_busy_time; /* Time (usec) busy sending data */ __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ + + __u32 tcpi_delivered; + __u32 tcpi_delivered_ce; }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ @@ -244,6 +251,8 @@ enum { TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ TCP_NLA_CA_STATE, /* ca_state of socket */ TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ + TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */ + TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ }; @@ -271,4 +280,11 @@ struct tcp_diag_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; }; +/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ + +struct tcp_zerocopy_receive { + __u64 address; /* in: address of mapping */ + __u32 length; /* in/out: number of bytes to map/mapped */ + __u32 recv_skip_hint; /* out: amount of bytes to skip */ +}; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index bf6d28677cfe..6b2fd4d9655f 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -209,16 +209,16 @@ struct tipc_group_req { * The string formatting for each name element is: * media: media * interface: media:interface name - * link: Z.C.N:interface-Z.C.N:interface - * + * link: node:interface-node:interface */ - +#define TIPC_NODEID_LEN 16 #define TIPC_MAX_MEDIA_NAME 16 #define TIPC_MAX_IF_NAME 16 #define TIPC_MAX_BEARER_NAME 32 #define TIPC_MAX_LINK_NAME 68 -#define SIOCGETLINKNAME SIOCPROTOPRIVATE +#define SIOCGETLINKNAME SIOCPROTOPRIVATE +#define SIOCGETNODEID (SIOCPROTOPRIVATE + 1) struct tipc_sioc_ln_req { __u32 peer; @@ -226,6 +226,10 @@ struct tipc_sioc_ln_req { char linkname[TIPC_MAX_LINK_NAME]; }; +struct tipc_sioc_nodeid_req { + __u32 peer; + char node_id[TIPC_NODEID_LEN]; +}; /* The macros and functions below are deprecated: */ diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 3f29e3c8ed06..4b2c93b1934c 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -185,6 +185,11 @@ #define TIPC_DEF_LINK_WIN 50 #define TIPC_MAX_LINK_WIN 8191 +/* + * Default MTU for UDP media + */ + +#define TIPC_DEF_LINK_UDP_MTU 14000 struct tipc_node_info { __be32 addr; /* network address of node */ diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0affb682e5e3..85c11982c89b 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -266,6 +266,7 @@ enum { TIPC_NLA_PROP_PRIO, /* u32 */ TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ + TIPC_NLA_PROP_MTU, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index efb7b5991c2f..09d00f8c442b 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -32,6 +32,7 @@ struct udphdr { #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ +#define UDP_SEGMENT 103 /* Set GSO segmentation size */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index a713fd23ec88..35c485fa9ea3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -4,6 +4,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o +obj-$(CONFIG_BPF_SYSCALL) += btf.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o obj-$(CONFIG_BPF_SYSCALL) += cpumap.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 027107f4be53..0fd8d8f1a398 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -11,11 +11,13 @@ * General Public License for more details. */ #include <linux/bpf.h> +#include <linux/btf.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/filter.h> #include <linux/perf_event.h> +#include <uapi/linux/btf.h> #include "map_in_map.h" @@ -336,6 +338,52 @@ static void array_map_free(struct bpf_map *map) bpf_map_area_free(array); } +static void array_map_seq_show_elem(struct bpf_map *map, void *key, + struct seq_file *m) +{ + void *value; + + rcu_read_lock(); + + value = array_map_lookup_elem(map, key); + if (!value) { + rcu_read_unlock(); + return; + } + + seq_printf(m, "%u: ", *(u32 *)key); + btf_type_seq_show(map->btf, map->btf_value_id, value, m); + seq_puts(m, "\n"); + + rcu_read_unlock(); +} + +static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, + u32 btf_key_id, u32 btf_value_id) +{ + const struct btf_type *key_type, *value_type; + u32 key_size, value_size; + u32 int_data; + + key_type = btf_type_id_size(btf, &btf_key_id, &key_size); + if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) + return -EINVAL; + + int_data = *(u32 *)(key_type + 1); + /* bpf array can only take a u32 key. This check makes + * sure that the btf matches the attr used during map_create. + */ + if (BTF_INT_BITS(int_data) != 32 || key_size != 4 || + BTF_INT_OFFSET(int_data)) + return -EINVAL; + + value_type = btf_type_id_size(btf, &btf_value_id, &value_size); + if (!value_type || value_size > map->value_size) + return -EINVAL; + + return 0; +} + const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -345,6 +393,8 @@ const struct bpf_map_ops array_map_ops = { .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_gen_lookup = array_map_gen_lookup, + .map_seq_show_elem = array_map_seq_show_elem, + .map_check_btf = array_map_check_btf, }; const struct bpf_map_ops percpu_array_map_ops = { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c new file mode 100644 index 000000000000..22e1046a1a86 --- /dev/null +++ b/kernel/bpf/btf.c @@ -0,0 +1,2064 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#include <uapi/linux/btf.h> +#include <uapi/linux/types.h> +#include <linux/seq_file.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/bpf_verifier.h> +#include <linux/btf.h> + +/* BTF (BPF Type Format) is the meta data format which describes + * the data types of BPF program/map. Hence, it basically focus + * on the C programming language which the modern BPF is primary + * using. + * + * ELF Section: + * ~~~~~~~~~~~ + * The BTF data is stored under the ".BTF" ELF section + * + * struct btf_type: + * ~~~~~~~~~~~~~~~ + * Each 'struct btf_type' object describes a C data type. + * Depending on the type it is describing, a 'struct btf_type' + * object may be followed by more data. F.e. + * To describe an array, 'struct btf_type' is followed by + * 'struct btf_array'. + * + * 'struct btf_type' and any extra data following it are + * 4 bytes aligned. + * + * Type section: + * ~~~~~~~~~~~~~ + * The BTF type section contains a list of 'struct btf_type' objects. + * Each one describes a C type. Recall from the above section + * that a 'struct btf_type' object could be immediately followed by extra + * data in order to desribe some particular C types. + * + * type_id: + * ~~~~~~~ + * Each btf_type object is identified by a type_id. The type_id + * is implicitly implied by the location of the btf_type object in + * the BTF type section. The first one has type_id 1. The second + * one has type_id 2...etc. Hence, an earlier btf_type has + * a smaller type_id. + * + * A btf_type object may refer to another btf_type object by using + * type_id (i.e. the "type" in the "struct btf_type"). + * + * NOTE that we cannot assume any reference-order. + * A btf_type object can refer to an earlier btf_type object + * but it can also refer to a later btf_type object. + * + * For example, to describe "const void *". A btf_type + * object describing "const" may refer to another btf_type + * object describing "void *". This type-reference is done + * by specifying type_id: + * + * [1] CONST (anon) type_id=2 + * [2] PTR (anon) type_id=0 + * + * The above is the btf_verifier debug log: + * - Each line started with "[?]" is a btf_type object + * - [?] is the type_id of the btf_type object. + * - CONST/PTR is the BTF_KIND_XXX + * - "(anon)" is the name of the type. It just + * happens that CONST and PTR has no name. + * - type_id=XXX is the 'u32 type' in btf_type + * + * NOTE: "void" has type_id 0 + * + * String section: + * ~~~~~~~~~~~~~~ + * The BTF string section contains the names used by the type section. + * Each string is referred by an "offset" from the beginning of the + * string section. + * + * Each string is '\0' terminated. + * + * The first character in the string section must be '\0' + * which is used to mean 'anonymous'. Some btf_type may not + * have a name. + */ + +/* BTF verification: + * + * To verify BTF data, two passes are needed. + * + * Pass #1 + * ~~~~~~~ + * The first pass is to collect all btf_type objects to + * an array: "btf->types". + * + * Depending on the C type that a btf_type is describing, + * a btf_type may be followed by extra data. We don't know + * how many btf_type is there, and more importantly we don't + * know where each btf_type is located in the type section. + * + * Without knowing the location of each type_id, most verifications + * cannot be done. e.g. an earlier btf_type may refer to a later + * btf_type (recall the "const void *" above), so we cannot + * check this type-reference in the first pass. + * + * In the first pass, it still does some verifications (e.g. + * checking the name is a valid offset to the string section). + * + * Pass #2 + * ~~~~~~~ + * The main focus is to resolve a btf_type that is referring + * to another type. + * + * We have to ensure the referring type: + * 1) does exist in the BTF (i.e. in btf->types[]) + * 2) does not cause a loop: + * struct A { + * struct B b; + * }; + * + * struct B { + * struct A a; + * }; + * + * btf_type_needs_resolve() decides if a btf_type needs + * to be resolved. + * + * The needs_resolve type implements the "resolve()" ops which + * essentially does a DFS and detects backedge. + * + * During resolve (or DFS), different C types have different + * "RESOLVED" conditions. + * + * When resolving a BTF_KIND_STRUCT, we need to resolve all its + * members because a member is always referring to another + * type. A struct's member can be treated as "RESOLVED" if + * it is referring to a BTF_KIND_PTR. Otherwise, the + * following valid C struct would be rejected: + * + * struct A { + * int m; + * struct A *a; + * }; + * + * When resolving a BTF_KIND_PTR, it needs to keep resolving if + * it is referring to another BTF_KIND_PTR. Otherwise, we cannot + * detect a pointer loop, e.g.: + * BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR + + * ^ | + * +-----------------------------------------+ + * + */ + +#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE) +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) +#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) +#define BITS_ROUNDUP_BYTES(bits) \ + (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) + +/* 16MB for 64k structs and each has 16 members and + * a few MB spaces for the string section. + * The hard limit is S32_MAX. + */ +#define BTF_MAX_SIZE (16 * 1024 * 1024) +/* 64k. We can raise it later. The hard limit is S32_MAX. */ +#define BTF_MAX_NR_TYPES 65535 + +#define for_each_member(i, struct_type, member) \ + for (i = 0, member = btf_type_member(struct_type); \ + i < btf_type_vlen(struct_type); \ + i++, member++) + +#define for_each_member_from(i, from, struct_type, member) \ + for (i = from, member = btf_type_member(struct_type) + from; \ + i < btf_type_vlen(struct_type); \ + i++, member++) + +struct btf { + union { + struct btf_header *hdr; + void *data; + }; + struct btf_type **types; + u32 *resolved_ids; + u32 *resolved_sizes; + const char *strings; + void *nohdr_data; + u32 nr_types; + u32 types_size; + u32 data_size; + refcount_t refcnt; +}; + +enum verifier_phase { + CHECK_META, + CHECK_TYPE, +}; + +struct resolve_vertex { + const struct btf_type *t; + u32 type_id; + u16 next_member; +}; + +enum visit_state { + NOT_VISITED, + VISITED, + RESOLVED, +}; + +enum resolve_mode { + RESOLVE_TBD, /* To Be Determined */ + RESOLVE_PTR, /* Resolving for Pointer */ + RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union + * or array + */ +}; + +#define MAX_RESOLVE_DEPTH 32 + +struct btf_verifier_env { + struct btf *btf; + u8 *visit_states; + struct resolve_vertex stack[MAX_RESOLVE_DEPTH]; + struct bpf_verifier_log log; + u32 log_type_id; + u32 top_stack; + enum verifier_phase phase; + enum resolve_mode resolve_mode; +}; + +static const char * const btf_kind_str[NR_BTF_KINDS] = { + [BTF_KIND_UNKN] = "UNKNOWN", + [BTF_KIND_INT] = "INT", + [BTF_KIND_PTR] = "PTR", + [BTF_KIND_ARRAY] = "ARRAY", + [BTF_KIND_STRUCT] = "STRUCT", + [BTF_KIND_UNION] = "UNION", + [BTF_KIND_ENUM] = "ENUM", + [BTF_KIND_FWD] = "FWD", + [BTF_KIND_TYPEDEF] = "TYPEDEF", + [BTF_KIND_VOLATILE] = "VOLATILE", + [BTF_KIND_CONST] = "CONST", + [BTF_KIND_RESTRICT] = "RESTRICT", +}; + +struct btf_kind_operations { + s32 (*check_meta)(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left); + int (*resolve)(struct btf_verifier_env *env, + const struct resolve_vertex *v); + int (*check_member)(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type); + void (*log_details)(struct btf_verifier_env *env, + const struct btf_type *t); + void (*seq_show)(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offsets, + struct seq_file *m); +}; + +static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS]; +static struct btf_type btf_void; + +static bool btf_type_is_modifier(const struct btf_type *t) +{ + /* Some of them is not strictly a C modifier + * but they are grouped into the same bucket + * for BTF concern: + * A type (t) that refers to another + * type through t->type AND its size cannot + * be determined without following the t->type. + * + * ptr does not fall into this bucket + * because its size is always sizeof(void *). + */ + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return true; + } + + return false; +} + +static bool btf_type_is_void(const struct btf_type *t) +{ + /* void => no type and size info. + * Hence, FWD is also treated as void. + */ + return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD; +} + +static bool btf_type_is_void_or_null(const struct btf_type *t) +{ + return !t || btf_type_is_void(t); +} + +/* union is only a special case of struct: + * all its offsetof(member) == 0 + */ +static bool btf_type_is_struct(const struct btf_type *t) +{ + u8 kind = BTF_INFO_KIND(t->info); + + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; +} + +static bool btf_type_is_array(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; +} + +static bool btf_type_is_ptr(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; +} + +static bool btf_type_is_int(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_INT; +} + +/* What types need to be resolved? + * + * btf_type_is_modifier() is an obvious one. + * + * btf_type_is_struct() because its member refers to + * another type (through member->type). + + * btf_type_is_array() because its element (array->type) + * refers to another type. Array can be thought of a + * special case of struct while array just has the same + * member-type repeated by array->nelems of times. + */ +static bool btf_type_needs_resolve(const struct btf_type *t) +{ + return btf_type_is_modifier(t) || + btf_type_is_ptr(t) || + btf_type_is_struct(t) || + btf_type_is_array(t); +} + +/* t->size can be used */ +static bool btf_type_has_size(const struct btf_type *t) +{ + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + return true; + } + + return false; +} + +static const char *btf_int_encoding_str(u8 encoding) +{ + if (encoding == 0) + return "(none)"; + else if (encoding == BTF_INT_SIGNED) + return "SIGNED"; + else if (encoding == BTF_INT_CHAR) + return "CHAR"; + else if (encoding == BTF_INT_BOOL) + return "BOOL"; + else if (encoding == BTF_INT_VARARGS) + return "VARARGS"; + else + return "UNKN"; +} + +static u16 btf_type_vlen(const struct btf_type *t) +{ + return BTF_INFO_VLEN(t->info); +} + +static u32 btf_type_int(const struct btf_type *t) +{ + return *(u32 *)(t + 1); +} + +static const struct btf_array *btf_type_array(const struct btf_type *t) +{ + return (const struct btf_array *)(t + 1); +} + +static const struct btf_member *btf_type_member(const struct btf_type *t) +{ + return (const struct btf_member *)(t + 1); +} + +static const struct btf_enum *btf_type_enum(const struct btf_type *t) +{ + return (const struct btf_enum *)(t + 1); +} + +static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) +{ + return kind_ops[BTF_INFO_KIND(t->info)]; +} + +static bool btf_name_offset_valid(const struct btf *btf, u32 offset) +{ + return !BTF_STR_TBL_ELF_ID(offset) && + BTF_STR_OFFSET(offset) < btf->hdr->str_len; +} + +static const char *btf_name_by_offset(const struct btf *btf, u32 offset) +{ + if (!BTF_STR_OFFSET(offset)) + return "(anon)"; + else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len) + return &btf->strings[BTF_STR_OFFSET(offset)]; + else + return "(invalid-name-offset)"; +} + +static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) +{ + if (type_id > btf->nr_types) + return NULL; + + return btf->types[type_id]; +} + +__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + bpf_verifier_vlog(log, fmt, args); + va_end(args); +} + +__printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env, + const char *fmt, ...) +{ + struct bpf_verifier_log *log = &env->log; + va_list args; + + if (!bpf_verifier_log_needed(log)) + return; + + va_start(args, fmt); + bpf_verifier_vlog(log, fmt, args); + va_end(args); +} + +__printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, + const struct btf_type *t, + bool log_details, + const char *fmt, ...) +{ + struct bpf_verifier_log *log = &env->log; + u8 kind = BTF_INFO_KIND(t->info); + struct btf *btf = env->btf; + va_list args; + + if (!bpf_verifier_log_needed(log)) + return; + + __btf_verifier_log(log, "[%u] %s %s%s", + env->log_type_id, + btf_kind_str[kind], + btf_name_by_offset(btf, t->name_off), + log_details ? " " : ""); + + if (log_details) + btf_type_ops(t)->log_details(env, t); + + if (fmt && *fmt) { + __btf_verifier_log(log, " "); + va_start(args, fmt); + bpf_verifier_vlog(log, fmt, args); + va_end(args); + } + + __btf_verifier_log(log, "\n"); +} + +#define btf_verifier_log_type(env, t, ...) \ + __btf_verifier_log_type((env), (t), true, __VA_ARGS__) +#define btf_verifier_log_basic(env, t, ...) \ + __btf_verifier_log_type((env), (t), false, __VA_ARGS__) + +__printf(4, 5) +static void btf_verifier_log_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const char *fmt, ...) +{ + struct bpf_verifier_log *log = &env->log; + struct btf *btf = env->btf; + va_list args; + + if (!bpf_verifier_log_needed(log)) + return; + + /* The CHECK_META phase already did a btf dump. + * + * If member is logged again, it must hit an error in + * parsing this member. It is useful to print out which + * struct this member belongs to. + */ + if (env->phase != CHECK_META) + btf_verifier_log_type(env, struct_type, NULL); + + __btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u", + btf_name_by_offset(btf, member->name_off), + member->type, member->offset); + + if (fmt && *fmt) { + __btf_verifier_log(log, " "); + va_start(args, fmt); + bpf_verifier_vlog(log, fmt, args); + va_end(args); + } + + __btf_verifier_log(log, "\n"); +} + +static void btf_verifier_log_hdr(struct btf_verifier_env *env) +{ + struct bpf_verifier_log *log = &env->log; + const struct btf *btf = env->btf; + const struct btf_header *hdr; + + if (!bpf_verifier_log_needed(log)) + return; + + hdr = btf->hdr; + __btf_verifier_log(log, "magic: 0x%x\n", hdr->magic); + __btf_verifier_log(log, "version: %u\n", hdr->version); + __btf_verifier_log(log, "flags: 0x%x\n", hdr->flags); + __btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label); + __btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name); + __btf_verifier_log(log, "label_off: %u\n", hdr->label_off); + __btf_verifier_log(log, "object_off: %u\n", hdr->object_off); + __btf_verifier_log(log, "func_off: %u\n", hdr->func_off); + __btf_verifier_log(log, "type_off: %u\n", hdr->type_off); + __btf_verifier_log(log, "str_off: %u\n", hdr->str_off); + __btf_verifier_log(log, "str_len: %u\n", hdr->str_len); + __btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size); +} + +static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) +{ + struct btf *btf = env->btf; + + /* < 2 because +1 for btf_void which is always in btf->types[0]. + * btf_void is not accounted in btf->nr_types because btf_void + * does not come from the BTF file. + */ + if (btf->types_size - btf->nr_types < 2) { + /* Expand 'types' array */ + + struct btf_type **new_types; + u32 expand_by, new_size; + + if (btf->types_size == BTF_MAX_NR_TYPES) { + btf_verifier_log(env, "Exceeded max num of types"); + return -E2BIG; + } + + expand_by = max_t(u32, btf->types_size >> 2, 16); + new_size = min_t(u32, BTF_MAX_NR_TYPES, + btf->types_size + expand_by); + + new_types = kvzalloc(new_size * sizeof(*new_types), + GFP_KERNEL | __GFP_NOWARN); + if (!new_types) + return -ENOMEM; + + if (btf->nr_types == 0) + new_types[0] = &btf_void; + else + memcpy(new_types, btf->types, + sizeof(*btf->types) * (btf->nr_types + 1)); + + kvfree(btf->types); + btf->types = new_types; + btf->types_size = new_size; + } + + btf->types[++(btf->nr_types)] = t; + + return 0; +} + +static void btf_free(struct btf *btf) +{ + kvfree(btf->types); + kvfree(btf->resolved_sizes); + kvfree(btf->resolved_ids); + kvfree(btf->data); + kfree(btf); +} + +static void btf_get(struct btf *btf) +{ + refcount_inc(&btf->refcnt); +} + +void btf_put(struct btf *btf) +{ + if (btf && refcount_dec_and_test(&btf->refcnt)) + btf_free(btf); +} + +static int env_resolve_init(struct btf_verifier_env *env) +{ + struct btf *btf = env->btf; + u32 nr_types = btf->nr_types; + u32 *resolved_sizes = NULL; + u32 *resolved_ids = NULL; + u8 *visit_states = NULL; + + /* +1 for btf_void */ + resolved_sizes = kvzalloc((nr_types + 1) * sizeof(*resolved_sizes), + GFP_KERNEL | __GFP_NOWARN); + if (!resolved_sizes) + goto nomem; + + resolved_ids = kvzalloc((nr_types + 1) * sizeof(*resolved_ids), + GFP_KERNEL | __GFP_NOWARN); + if (!resolved_ids) + goto nomem; + + visit_states = kvzalloc((nr_types + 1) * sizeof(*visit_states), + GFP_KERNEL | __GFP_NOWARN); + if (!visit_states) + goto nomem; + + btf->resolved_sizes = resolved_sizes; + btf->resolved_ids = resolved_ids; + env->visit_states = visit_states; + + return 0; + +nomem: + kvfree(resolved_sizes); + kvfree(resolved_ids); + kvfree(visit_states); + return -ENOMEM; +} + +static void btf_verifier_env_free(struct btf_verifier_env *env) +{ + kvfree(env->visit_states); + kfree(env); +} + +static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, + const struct btf_type *next_type) +{ + switch (env->resolve_mode) { + case RESOLVE_TBD: + /* int, enum or void is a sink */ + return !btf_type_needs_resolve(next_type); + case RESOLVE_PTR: + /* int, enum, void, struct or array is a sink for ptr */ + return !btf_type_is_modifier(next_type) && + !btf_type_is_ptr(next_type); + case RESOLVE_STRUCT_OR_ARRAY: + /* int, enum, void or ptr is a sink for struct and array */ + return !btf_type_is_modifier(next_type) && + !btf_type_is_array(next_type) && + !btf_type_is_struct(next_type); + default: + BUG_ON(1); + } +} + +static bool env_type_is_resolved(const struct btf_verifier_env *env, + u32 type_id) +{ + return env->visit_states[type_id] == RESOLVED; +} + +static int env_stack_push(struct btf_verifier_env *env, + const struct btf_type *t, u32 type_id) +{ + struct resolve_vertex *v; + + if (env->top_stack == MAX_RESOLVE_DEPTH) + return -E2BIG; + + if (env->visit_states[type_id] != NOT_VISITED) + return -EEXIST; + + env->visit_states[type_id] = VISITED; + + v = &env->stack[env->top_stack++]; + v->t = t; + v->type_id = type_id; + v->next_member = 0; + + if (env->resolve_mode == RESOLVE_TBD) { + if (btf_type_is_ptr(t)) + env->resolve_mode = RESOLVE_PTR; + else if (btf_type_is_struct(t) || btf_type_is_array(t)) + env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY; + } + + return 0; +} + +static void env_stack_set_next_member(struct btf_verifier_env *env, + u16 next_member) +{ + env->stack[env->top_stack - 1].next_member = next_member; +} + +static void env_stack_pop_resolved(struct btf_verifier_env *env, + u32 resolved_type_id, + u32 resolved_size) +{ + u32 type_id = env->stack[--(env->top_stack)].type_id; + struct btf *btf = env->btf; + + btf->resolved_sizes[type_id] = resolved_size; + btf->resolved_ids[type_id] = resolved_type_id; + env->visit_states[type_id] = RESOLVED; +} + +static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env) +{ + return env->top_stack ? &env->stack[env->top_stack - 1] : NULL; +} + +/* The input param "type_id" must point to a needs_resolve type */ +static const struct btf_type *btf_type_id_resolve(const struct btf *btf, + u32 *type_id) +{ + *type_id = btf->resolved_ids[*type_id]; + return btf_type_by_id(btf, *type_id); +} + +const struct btf_type *btf_type_id_size(const struct btf *btf, + u32 *type_id, u32 *ret_size) +{ + const struct btf_type *size_type; + u32 size_type_id = *type_id; + u32 size = 0; + + size_type = btf_type_by_id(btf, size_type_id); + if (btf_type_is_void_or_null(size_type)) + return NULL; + + if (btf_type_has_size(size_type)) { + size = size_type->size; + } else if (btf_type_is_array(size_type)) { + size = btf->resolved_sizes[size_type_id]; + } else if (btf_type_is_ptr(size_type)) { + size = sizeof(void *); + } else { + if (WARN_ON_ONCE(!btf_type_is_modifier(size_type))) + return NULL; + + size = btf->resolved_sizes[size_type_id]; + size_type_id = btf->resolved_ids[size_type_id]; + size_type = btf_type_by_id(btf, size_type_id); + if (btf_type_is_void(size_type)) + return NULL; + } + + *type_id = size_type_id; + if (ret_size) + *ret_size = size; + + return size_type; +} + +static int btf_df_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + btf_verifier_log_basic(env, struct_type, + "Unsupported check_member"); + return -EINVAL; +} + +static int btf_df_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + btf_verifier_log_basic(env, v->t, "Unsupported resolve"); + return -EINVAL; +} + +static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offsets, + struct seq_file *m) +{ + seq_printf(m, "<unsupported kind:%u>", BTF_INFO_KIND(t->info)); +} + +static int btf_int_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u32 int_data = btf_type_int(member_type); + u32 struct_bits_off = member->offset; + u32 struct_size = struct_type->size; + u32 nr_copy_bits; + u32 bytes_offset; + + if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) { + btf_verifier_log_member(env, struct_type, member, + "bits_offset exceeds U32_MAX"); + return -EINVAL; + } + + struct_bits_off += BTF_INT_OFFSET(int_data); + bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); + nr_copy_bits = BTF_INT_BITS(int_data) + + BITS_PER_BYTE_MASKED(struct_bits_off); + + if (nr_copy_bits > BITS_PER_U64) { + btf_verifier_log_member(env, struct_type, member, + "nr_copy_bits exceeds 64"); + return -EINVAL; + } + + if (struct_size < bytes_offset || + struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + +static s32 btf_int_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + u32 int_data, nr_bits, meta_needed = sizeof(int_data); + u16 encoding; + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + int_data = btf_type_int(t); + nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); + + if (nr_bits > BITS_PER_U64) { + btf_verifier_log_type(env, t, "nr_bits exceeds %zu", + BITS_PER_U64); + return -EINVAL; + } + + if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) { + btf_verifier_log_type(env, t, "nr_bits exceeds type_size"); + return -EINVAL; + } + + encoding = BTF_INT_ENCODING(int_data); + if (encoding && + encoding != BTF_INT_SIGNED && + encoding != BTF_INT_CHAR && + encoding != BTF_INT_BOOL && + encoding != BTF_INT_VARARGS) { + btf_verifier_log_type(env, t, "Unsupported encoding"); + return -ENOTSUPP; + } + + btf_verifier_log_type(env, t, NULL); + + return meta_needed; +} + +static void btf_int_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + int int_data = btf_type_int(t); + + btf_verifier_log(env, + "size=%u bits_offset=%u nr_bits=%u encoding=%s", + t->size, BTF_INT_OFFSET(int_data), + BTF_INT_BITS(int_data), + btf_int_encoding_str(BTF_INT_ENCODING(int_data))); +} + +static void btf_int_bits_seq_show(const struct btf *btf, + const struct btf_type *t, + void *data, u8 bits_offset, + struct seq_file *m) +{ + u32 int_data = btf_type_int(t); + u16 nr_bits = BTF_INT_BITS(int_data); + u16 total_bits_offset; + u16 nr_copy_bytes; + u16 nr_copy_bits; + u8 nr_upper_bits; + union { + u64 u64_num; + u8 u8_nums[8]; + } print_num; + + total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); + data += BITS_ROUNDDOWN_BYTES(total_bits_offset); + bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + nr_copy_bits = nr_bits + bits_offset; + nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); + + print_num.u64_num = 0; + memcpy(&print_num.u64_num, data, nr_copy_bytes); + + /* Ditch the higher order bits */ + nr_upper_bits = BITS_PER_BYTE_MASKED(nr_copy_bits); + if (nr_upper_bits) { + /* We need to mask out some bits of the upper byte. */ + u8 mask = (1 << nr_upper_bits) - 1; + + print_num.u8_nums[nr_copy_bytes - 1] &= mask; + } + + print_num.u64_num >>= bits_offset; + + seq_printf(m, "0x%llx", print_num.u64_num); +} + +static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + u32 int_data = btf_type_int(t); + u8 encoding = BTF_INT_ENCODING(int_data); + bool sign = encoding & BTF_INT_SIGNED; + u32 nr_bits = BTF_INT_BITS(int_data); + + if (bits_offset || BTF_INT_OFFSET(int_data) || + BITS_PER_BYTE_MASKED(nr_bits)) { + btf_int_bits_seq_show(btf, t, data, bits_offset, m); + return; + } + + switch (nr_bits) { + case 64: + if (sign) + seq_printf(m, "%lld", *(s64 *)data); + else + seq_printf(m, "%llu", *(u64 *)data); + break; + case 32: + if (sign) + seq_printf(m, "%d", *(s32 *)data); + else + seq_printf(m, "%u", *(u32 *)data); + break; + case 16: + if (sign) + seq_printf(m, "%d", *(s16 *)data); + else + seq_printf(m, "%u", *(u16 *)data); + break; + case 8: + if (sign) + seq_printf(m, "%d", *(s8 *)data); + else + seq_printf(m, "%u", *(u8 *)data); + break; + default: + btf_int_bits_seq_show(btf, t, data, bits_offset, m); + } +} + +static const struct btf_kind_operations int_ops = { + .check_meta = btf_int_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_int_check_member, + .log_details = btf_int_log, + .seq_show = btf_int_seq_show, +}; + +static int btf_modifier_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + const struct btf_type *resolved_type; + u32 resolved_type_id = member->type; + struct btf_member resolved_member; + struct btf *btf = env->btf; + + resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL); + if (!resolved_type) { + btf_verifier_log_member(env, struct_type, member, + "Invalid member"); + return -EINVAL; + } + + resolved_member = *member; + resolved_member.type = resolved_type_id; + + return btf_type_ops(resolved_type)->check_member(env, struct_type, + &resolved_member, + resolved_type); +} + +static int btf_ptr_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u32 struct_size, struct_bits_off, bytes_offset; + + struct_size = struct_type->size; + struct_bits_off = member->offset; + bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); + + if (BITS_PER_BYTE_MASKED(struct_bits_off)) { + btf_verifier_log_member(env, struct_type, member, + "Member is not byte aligned"); + return -EINVAL; + } + + if (struct_size - bytes_offset < sizeof(void *)) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + +static int btf_ref_type_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + if (BTF_TYPE_PARENT(t->type)) { + btf_verifier_log_type(env, t, "Invalid type_id"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return 0; +} + +static int btf_modifier_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *t = v->t; + const struct btf_type *next_type; + u32 next_type_id = t->type; + struct btf *btf = env->btf; + u32 next_type_size = 0; + + next_type = btf_type_by_id(btf, next_type_id); + if (!next_type) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } + + /* "typedef void new_void", "const void"...etc */ + if (btf_type_is_void(next_type)) + goto resolved; + + if (!env_type_is_resolve_sink(env, next_type) && + !env_type_is_resolved(env, next_type_id)) + return env_stack_push(env, next_type, next_type_id); + + /* Figure out the resolved next_type_id with size. + * They will be stored in the current modifier's + * resolved_ids and resolved_sizes such that it can + * save us a few type-following when we use it later (e.g. in + * pretty print). + */ + if (!btf_type_id_size(btf, &next_type_id, &next_type_size) && + !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } + +resolved: + env_stack_pop_resolved(env, next_type_id, next_type_size); + + return 0; +} + +static int btf_ptr_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *next_type; + const struct btf_type *t = v->t; + u32 next_type_id = t->type; + struct btf *btf = env->btf; + u32 next_type_size = 0; + + next_type = btf_type_by_id(btf, next_type_id); + if (!next_type) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } + + /* "void *" */ + if (btf_type_is_void(next_type)) + goto resolved; + + if (!env_type_is_resolve_sink(env, next_type) && + !env_type_is_resolved(env, next_type_id)) + return env_stack_push(env, next_type, next_type_id); + + /* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY, + * the modifier may have stopped resolving when it was resolved + * to a ptr (last-resolved-ptr). + * + * We now need to continue from the last-resolved-ptr to + * ensure the last-resolved-ptr will not referring back to + * the currenct ptr (t). + */ + if (btf_type_is_modifier(next_type)) { + const struct btf_type *resolved_type; + u32 resolved_type_id; + + resolved_type_id = next_type_id; + resolved_type = btf_type_id_resolve(btf, &resolved_type_id); + + if (btf_type_is_ptr(resolved_type) && + !env_type_is_resolve_sink(env, resolved_type) && + !env_type_is_resolved(env, resolved_type_id)) + return env_stack_push(env, resolved_type, + resolved_type_id); + } + + if (!btf_type_id_size(btf, &next_type_id, &next_type_size) && + !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } + +resolved: + env_stack_pop_resolved(env, next_type_id, 0); + + return 0; +} + +static void btf_modifier_seq_show(const struct btf *btf, + const struct btf_type *t, + u32 type_id, void *data, + u8 bits_offset, struct seq_file *m) +{ + t = btf_type_id_resolve(btf, &type_id); + + btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); +} + +static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + /* It is a hashed value */ + seq_printf(m, "%p", *(void **)data); +} + +static void btf_ref_type_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + btf_verifier_log(env, "type_id=%u", t->type); +} + +static struct btf_kind_operations modifier_ops = { + .check_meta = btf_ref_type_check_meta, + .resolve = btf_modifier_resolve, + .check_member = btf_modifier_check_member, + .log_details = btf_ref_type_log, + .seq_show = btf_modifier_seq_show, +}; + +static struct btf_kind_operations ptr_ops = { + .check_meta = btf_ref_type_check_meta, + .resolve = btf_ptr_resolve, + .check_member = btf_ptr_check_member, + .log_details = btf_ref_type_log, + .seq_show = btf_ptr_seq_show, +}; + +static struct btf_kind_operations fwd_ops = { + .check_meta = btf_ref_type_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_df_check_member, + .log_details = btf_ref_type_log, + .seq_show = btf_df_seq_show, +}; + +static int btf_array_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u32 struct_bits_off = member->offset; + u32 struct_size, bytes_offset; + u32 array_type_id, array_size; + struct btf *btf = env->btf; + + if (BITS_PER_BYTE_MASKED(struct_bits_off)) { + btf_verifier_log_member(env, struct_type, member, + "Member is not byte aligned"); + return -EINVAL; + } + + array_type_id = member->type; + btf_type_id_size(btf, &array_type_id, &array_size); + struct_size = struct_type->size; + bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); + if (struct_size - bytes_offset < array_size) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + +static s32 btf_array_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_array *array = btf_type_array(t); + u32 meta_needed = sizeof(*array); + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + /* We are a little forgiving on array->index_type since + * the kernel is not using it. + */ + /* Array elem cannot be in type void, + * so !array->type is not allowed. + */ + if (!array->type || BTF_TYPE_PARENT(array->type)) { + btf_verifier_log_type(env, t, "Invalid type_id"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return meta_needed; +} + +static int btf_array_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_array *array = btf_type_array(v->t); + const struct btf_type *elem_type; + u32 elem_type_id = array->type; + struct btf *btf = env->btf; + u32 elem_size; + + elem_type = btf_type_by_id(btf, elem_type_id); + if (btf_type_is_void_or_null(elem_type)) { + btf_verifier_log_type(env, v->t, + "Invalid elem"); + return -EINVAL; + } + + if (!env_type_is_resolve_sink(env, elem_type) && + !env_type_is_resolved(env, elem_type_id)) + return env_stack_push(env, elem_type, elem_type_id); + + elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); + if (!elem_type) { + btf_verifier_log_type(env, v->t, "Invalid elem"); + return -EINVAL; + } + + if (btf_type_is_int(elem_type)) { + int int_type_data = btf_type_int(elem_type); + u16 nr_bits = BTF_INT_BITS(int_type_data); + u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits); + + /* Put more restriction on array of int. The int cannot + * be a bit field and it must be either u8/u16/u32/u64. + */ + if (BITS_PER_BYTE_MASKED(nr_bits) || + BTF_INT_OFFSET(int_type_data) || + (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && + nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) { + btf_verifier_log_type(env, v->t, + "Invalid array of int"); + return -EINVAL; + } + } + + if (array->nelems && elem_size > U32_MAX / array->nelems) { + btf_verifier_log_type(env, v->t, + "Array size overflows U32_MAX"); + return -EINVAL; + } + + env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems); + + return 0; +} + +static void btf_array_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + const struct btf_array *array = btf_type_array(t); + + btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u", + array->type, array->index_type, array->nelems); +} + +static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + const struct btf_array *array = btf_type_array(t); + const struct btf_kind_operations *elem_ops; + const struct btf_type *elem_type; + u32 i, elem_size, elem_type_id; + + elem_type_id = array->type; + elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); + elem_ops = btf_type_ops(elem_type); + seq_puts(m, "["); + for (i = 0; i < array->nelems; i++) { + if (i) + seq_puts(m, ","); + + elem_ops->seq_show(btf, elem_type, elem_type_id, data, + bits_offset, m); + data += elem_size; + } + seq_puts(m, "]"); +} + +static struct btf_kind_operations array_ops = { + .check_meta = btf_array_check_meta, + .resolve = btf_array_resolve, + .check_member = btf_array_check_member, + .log_details = btf_array_log, + .seq_show = btf_array_seq_show, +}; + +static int btf_struct_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u32 struct_bits_off = member->offset; + u32 struct_size, bytes_offset; + + if (BITS_PER_BYTE_MASKED(struct_bits_off)) { + btf_verifier_log_member(env, struct_type, member, + "Member is not byte aligned"); + return -EINVAL; + } + + struct_size = struct_type->size; + bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); + if (struct_size - bytes_offset < member_type->size) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + +static s32 btf_struct_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION; + const struct btf_member *member; + struct btf *btf = env->btf; + u32 struct_size = t->size; + u32 meta_needed; + u16 i; + + meta_needed = btf_type_vlen(t) * sizeof(*member); + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + for_each_member(i, t, member) { + if (!btf_name_offset_valid(btf, member->name_off)) { + btf_verifier_log_member(env, t, member, + "Invalid member name_offset:%u", + member->name_off); + return -EINVAL; + } + + /* A member cannot be in type void */ + if (!member->type || BTF_TYPE_PARENT(member->type)) { + btf_verifier_log_member(env, t, member, + "Invalid type_id"); + return -EINVAL; + } + + if (is_union && member->offset) { + btf_verifier_log_member(env, t, member, + "Invalid member bits_offset"); + return -EINVAL; + } + + if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) { + btf_verifier_log_member(env, t, member, + "Memmber bits_offset exceeds its struct size"); + return -EINVAL; + } + + btf_verifier_log_member(env, t, member, NULL); + } + + return meta_needed; +} + +static int btf_struct_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_member *member; + int err; + u16 i; + + /* Before continue resolving the next_member, + * ensure the last member is indeed resolved to a + * type with size info. + */ + if (v->next_member) { + const struct btf_type *last_member_type; + const struct btf_member *last_member; + u16 last_member_type_id; + + last_member = btf_type_member(v->t) + v->next_member - 1; + last_member_type_id = last_member->type; + if (WARN_ON_ONCE(!env_type_is_resolved(env, + last_member_type_id))) + return -EINVAL; + + last_member_type = btf_type_by_id(env->btf, + last_member_type_id); + err = btf_type_ops(last_member_type)->check_member(env, v->t, + last_member, + last_member_type); + if (err) + return err; + } + + for_each_member_from(i, v->next_member, v->t, member) { + u32 member_type_id = member->type; + const struct btf_type *member_type = btf_type_by_id(env->btf, + member_type_id); + + if (btf_type_is_void_or_null(member_type)) { + btf_verifier_log_member(env, v->t, member, + "Invalid member"); + return -EINVAL; + } + + if (!env_type_is_resolve_sink(env, member_type) && + !env_type_is_resolved(env, member_type_id)) { + env_stack_set_next_member(env, i + 1); + return env_stack_push(env, member_type, member_type_id); + } + + err = btf_type_ops(member_type)->check_member(env, v->t, + member, + member_type); + if (err) + return err; + } + + env_stack_pop_resolved(env, 0, 0); + + return 0; +} + +static void btf_struct_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); +} + +static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ","; + const struct btf_member *member; + u32 i; + + seq_puts(m, "{"); + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + u32 member_offset = member->offset; + u32 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); + u8 bits8_offset = BITS_PER_BYTE_MASKED(member_offset); + const struct btf_kind_operations *ops; + + if (i) + seq_puts(m, seq); + + ops = btf_type_ops(member_type); + ops->seq_show(btf, member_type, member->type, + data + bytes_offset, bits8_offset, m); + } + seq_puts(m, "}"); +} + +static struct btf_kind_operations struct_ops = { + .check_meta = btf_struct_check_meta, + .resolve = btf_struct_resolve, + .check_member = btf_struct_check_member, + .log_details = btf_struct_log, + .seq_show = btf_struct_seq_show, +}; + +static int btf_enum_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u32 struct_bits_off = member->offset; + u32 struct_size, bytes_offset; + + if (BITS_PER_BYTE_MASKED(struct_bits_off)) { + btf_verifier_log_member(env, struct_type, member, + "Member is not byte aligned"); + return -EINVAL; + } + + struct_size = struct_type->size; + bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); + if (struct_size - bytes_offset < sizeof(int)) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + +static s32 btf_enum_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_enum *enums = btf_type_enum(t); + struct btf *btf = env->btf; + u16 i, nr_enums; + u32 meta_needed; + + nr_enums = btf_type_vlen(t); + meta_needed = nr_enums * sizeof(*enums); + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (t->size != sizeof(int)) { + btf_verifier_log_type(env, t, "Expected size:%zu", + sizeof(int)); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + for (i = 0; i < nr_enums; i++) { + if (!btf_name_offset_valid(btf, enums[i].name_off)) { + btf_verifier_log(env, "\tInvalid name_offset:%u", + enums[i].name_off); + return -EINVAL; + } + + btf_verifier_log(env, "\t%s val=%d\n", + btf_name_by_offset(btf, enums[i].name_off), + enums[i].val); + } + + return meta_needed; +} + +static void btf_enum_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); +} + +static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + const struct btf_enum *enums = btf_type_enum(t); + u32 i, nr_enums = btf_type_vlen(t); + int v = *(int *)data; + + for (i = 0; i < nr_enums; i++) { + if (v == enums[i].val) { + seq_printf(m, "%s", + btf_name_by_offset(btf, enums[i].name_off)); + return; + } + } + + seq_printf(m, "%d", v); +} + +static struct btf_kind_operations enum_ops = { + .check_meta = btf_enum_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_enum_check_member, + .log_details = btf_enum_log, + .seq_show = btf_enum_seq_show, +}; + +static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { + [BTF_KIND_INT] = &int_ops, + [BTF_KIND_PTR] = &ptr_ops, + [BTF_KIND_ARRAY] = &array_ops, + [BTF_KIND_STRUCT] = &struct_ops, + [BTF_KIND_UNION] = &struct_ops, + [BTF_KIND_ENUM] = &enum_ops, + [BTF_KIND_FWD] = &fwd_ops, + [BTF_KIND_TYPEDEF] = &modifier_ops, + [BTF_KIND_VOLATILE] = &modifier_ops, + [BTF_KIND_CONST] = &modifier_ops, + [BTF_KIND_RESTRICT] = &modifier_ops, +}; + +static s32 btf_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + u32 saved_meta_left = meta_left; + s32 var_meta_size; + + if (meta_left < sizeof(*t)) { + btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu", + env->log_type_id, meta_left, sizeof(*t)); + return -EINVAL; + } + meta_left -= sizeof(*t); + + if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX || + BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) { + btf_verifier_log(env, "[%u] Invalid kind:%u", + env->log_type_id, BTF_INFO_KIND(t->info)); + return -EINVAL; + } + + if (!btf_name_offset_valid(env->btf, t->name_off)) { + btf_verifier_log(env, "[%u] Invalid name_offset:%u", + env->log_type_id, t->name_off); + return -EINVAL; + } + + var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left); + if (var_meta_size < 0) + return var_meta_size; + + meta_left -= var_meta_size; + + return saved_meta_left - meta_left; +} + +static int btf_check_all_metas(struct btf_verifier_env *env) +{ + struct btf *btf = env->btf; + struct btf_header *hdr; + void *cur, *end; + + hdr = btf->hdr; + cur = btf->nohdr_data + hdr->type_off; + end = btf->nohdr_data + hdr->str_off; + + env->log_type_id = 1; + while (cur < end) { + struct btf_type *t = cur; + s32 meta_size; + + meta_size = btf_check_meta(env, t, end - cur); + if (meta_size < 0) + return meta_size; + + btf_add_type(env, t); + cur += meta_size; + env->log_type_id++; + } + + return 0; +} + +static int btf_resolve(struct btf_verifier_env *env, + const struct btf_type *t, u32 type_id) +{ + const struct resolve_vertex *v; + int err = 0; + + env->resolve_mode = RESOLVE_TBD; + env_stack_push(env, t, type_id); + while (!err && (v = env_stack_peak(env))) { + env->log_type_id = v->type_id; + err = btf_type_ops(v->t)->resolve(env, v); + } + + env->log_type_id = type_id; + if (err == -E2BIG) + btf_verifier_log_type(env, t, + "Exceeded max resolving depth:%u", + MAX_RESOLVE_DEPTH); + else if (err == -EEXIST) + btf_verifier_log_type(env, t, "Loop detected"); + + return err; +} + +static bool btf_resolve_valid(struct btf_verifier_env *env, + const struct btf_type *t, + u32 type_id) +{ + struct btf *btf = env->btf; + + if (!env_type_is_resolved(env, type_id)) + return false; + + if (btf_type_is_struct(t)) + return !btf->resolved_ids[type_id] && + !btf->resolved_sizes[type_id]; + + if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) { + t = btf_type_id_resolve(btf, &type_id); + return t && !btf_type_is_modifier(t); + } + + if (btf_type_is_array(t)) { + const struct btf_array *array = btf_type_array(t); + const struct btf_type *elem_type; + u32 elem_type_id = array->type; + u32 elem_size; + + elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); + return elem_type && !btf_type_is_modifier(elem_type) && + (array->nelems * elem_size == + btf->resolved_sizes[type_id]); + } + + return false; +} + +static int btf_check_all_types(struct btf_verifier_env *env) +{ + struct btf *btf = env->btf; + u32 type_id; + int err; + + err = env_resolve_init(env); + if (err) + return err; + + env->phase++; + for (type_id = 1; type_id <= btf->nr_types; type_id++) { + const struct btf_type *t = btf_type_by_id(btf, type_id); + + env->log_type_id = type_id; + if (btf_type_needs_resolve(t) && + !env_type_is_resolved(env, type_id)) { + err = btf_resolve(env, t, type_id); + if (err) + return err; + } + + if (btf_type_needs_resolve(t) && + !btf_resolve_valid(env, t, type_id)) { + btf_verifier_log_type(env, t, "Invalid resolve state"); + return -EINVAL; + } + } + + return 0; +} + +static int btf_parse_type_sec(struct btf_verifier_env *env) +{ + int err; + + err = btf_check_all_metas(env); + if (err) + return err; + + return btf_check_all_types(env); +} + +static int btf_parse_str_sec(struct btf_verifier_env *env) +{ + const struct btf_header *hdr; + struct btf *btf = env->btf; + const char *start, *end; + + hdr = btf->hdr; + start = btf->nohdr_data + hdr->str_off; + end = start + hdr->str_len; + + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + start[0] || end[-1]) { + btf_verifier_log(env, "Invalid string section"); + return -EINVAL; + } + + btf->strings = start; + + return 0; +} + +static int btf_parse_hdr(struct btf_verifier_env *env) +{ + const struct btf_header *hdr; + struct btf *btf = env->btf; + u32 meta_left; + + if (btf->data_size < sizeof(*hdr)) { + btf_verifier_log(env, "btf_header not found"); + return -EINVAL; + } + + btf_verifier_log_hdr(env); + + hdr = btf->hdr; + if (hdr->magic != BTF_MAGIC) { + btf_verifier_log(env, "Invalid magic"); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + btf_verifier_log(env, "Unsupported version"); + return -ENOTSUPP; + } + + if (hdr->flags) { + btf_verifier_log(env, "Unsupported flags"); + return -ENOTSUPP; + } + + meta_left = btf->data_size - sizeof(*hdr); + if (!meta_left) { + btf_verifier_log(env, "No data"); + return -EINVAL; + } + + if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off || + /* Type section must align to 4 bytes */ + hdr->type_off & (sizeof(u32) - 1)) { + btf_verifier_log(env, "Invalid type_off"); + return -EINVAL; + } + + if (meta_left < hdr->str_off || + meta_left - hdr->str_off < hdr->str_len) { + btf_verifier_log(env, "Invalid str_off or str_len"); + return -EINVAL; + } + + btf->nohdr_data = btf->hdr + 1; + + return 0; +} + +static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, + u32 log_level, char __user *log_ubuf, u32 log_size) +{ + struct btf_verifier_env *env = NULL; + struct bpf_verifier_log *log; + struct btf *btf = NULL; + u8 *data; + int err; + + if (btf_data_size > BTF_MAX_SIZE) + return ERR_PTR(-E2BIG); + + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); + if (!env) + return ERR_PTR(-ENOMEM); + + log = &env->log; + if (log_level || log_ubuf || log_size) { + /* user requested verbose verifier output + * and supplied buffer to store the verification trace + */ + log->level = log_level; + log->ubuf = log_ubuf; + log->len_total = log_size; + + /* log attributes have to be sane */ + if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || + !log->level || !log->ubuf) { + err = -EINVAL; + goto errout; + } + } + + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); + if (!btf) { + err = -ENOMEM; + goto errout; + } + + data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN); + if (!data) { + err = -ENOMEM; + goto errout; + } + + btf->data = data; + btf->data_size = btf_data_size; + + if (copy_from_user(data, btf_data, btf_data_size)) { + err = -EFAULT; + goto errout; + } + + env->btf = btf; + + err = btf_parse_hdr(env); + if (err) + goto errout; + + err = btf_parse_str_sec(env); + if (err) + goto errout; + + err = btf_parse_type_sec(env); + if (err) + goto errout; + + if (!err && log->level && bpf_verifier_log_full(log)) { + err = -ENOSPC; + goto errout; + } + + if (!err) { + btf_verifier_env_free(env); + btf_get(btf); + return btf; + } + +errout: + btf_verifier_env_free(env); + if (btf) + btf_free(btf); + return ERR_PTR(err); +} + +void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, + struct seq_file *m) +{ + const struct btf_type *t = btf_type_by_id(btf, type_id); + + btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m); +} + +static int btf_release(struct inode *inode, struct file *filp) +{ + btf_put(filp->private_data); + return 0; +} + +const struct file_operations btf_fops = { + .release = btf_release, +}; + +int btf_new_fd(const union bpf_attr *attr) +{ + struct btf *btf; + int fd; + + btf = btf_parse(u64_to_user_ptr(attr->btf), + attr->btf_size, attr->btf_log_level, + u64_to_user_ptr(attr->btf_log_buf), + attr->btf_log_size); + if (IS_ERR(btf)) + return PTR_ERR(btf); + + fd = anon_inode_getfd("btf", &btf_fops, btf, + O_RDONLY | O_CLOEXEC); + if (fd < 0) + btf_put(btf); + + return fd; +} + +struct btf *btf_get_by_fd(int fd) +{ + struct btf *btf; + struct fd f; + + f = fdget(fd); + + if (!f.file) + return ERR_PTR(-EBADF); + + if (f.file->f_op != &btf_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + btf = f.file->private_data; + btf_get(btf); + fdput(f); + + return btf; +} + +int btf_get_info_by_fd(const struct btf *btf, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *udata = u64_to_user_ptr(attr->info.info); + u32 copy_len = min_t(u32, btf->data_size, + attr->info.info_len); + + if (copy_to_user(udata, btf->data, copy_len) || + put_user(btf->data_size, &uattr->info.info_len)) + return -EFAULT; + + return 0; +} diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index a4bb0b34375a..c95b04ec103e 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -19,6 +19,7 @@ #include <linux/bpf.h> #include <linux/filter.h> #include <linux/ptr_ring.h> +#include <net/xdp.h> #include <linux/sched.h> #include <linux/workqueue.h> @@ -137,27 +138,6 @@ free_cmap: return ERR_PTR(err); } -static void __cpu_map_queue_destructor(void *ptr) -{ - /* The tear-down procedure should have made sure that queue is - * empty. See __cpu_map_entry_replace() and work-queue - * invoked cpu_map_kthread_stop(). Catch any broken behaviour - * gracefully and warn once. - */ - if (WARN_ON_ONCE(ptr)) - page_frag_free(ptr); -} - -static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) -{ - if (atomic_dec_and_test(&rcpu->refcnt)) { - /* The queue should be empty at this point */ - ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor); - kfree(rcpu->queue); - kfree(rcpu); - } -} - static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) { atomic_inc(&rcpu->refcnt); @@ -179,45 +159,8 @@ static void cpu_map_kthread_stop(struct work_struct *work) kthread_stop(rcpu->kthread); } -/* For now, xdp_pkt is a cpumap internal data structure, with info - * carried between enqueue to dequeue. It is mapped into the top - * headroom of the packet, to avoid allocating separate mem. - */ -struct xdp_pkt { - void *data; - u16 len; - u16 headroom; - u16 metasize; - struct net_device *dev_rx; -}; - -/* Convert xdp_buff to xdp_pkt */ -static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp) -{ - struct xdp_pkt *xdp_pkt; - int metasize; - int headroom; - - /* Assure headroom is available for storing info */ - headroom = xdp->data - xdp->data_hard_start; - metasize = xdp->data - xdp->data_meta; - metasize = metasize > 0 ? metasize : 0; - if (unlikely((headroom - metasize) < sizeof(*xdp_pkt))) - return NULL; - - /* Store info in top of packet */ - xdp_pkt = xdp->data_hard_start; - - xdp_pkt->data = xdp->data; - xdp_pkt->len = xdp->data_end - xdp->data; - xdp_pkt->headroom = headroom - sizeof(*xdp_pkt); - xdp_pkt->metasize = metasize; - - return xdp_pkt; -} - static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, - struct xdp_pkt *xdp_pkt) + struct xdp_frame *xdpf) { unsigned int frame_size; void *pkt_data_start; @@ -232,7 +175,7 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * would be preferred to set frame_size to 2048 or 4096 * depending on the driver. * frame_size = 2048; - * frame_len = frame_size - sizeof(*xdp_pkt); + * frame_len = frame_size - sizeof(*xdp_frame); * * Instead, with info avail, skb_shared_info in placed after * packet len. This, unfortunately fakes the truesize. @@ -240,21 +183,21 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * is not at a fixed memory location, with mixed length * packets, which is bad for cache-line hotness. */ - frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom + + frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - pkt_data_start = xdp_pkt->data - xdp_pkt->headroom; + pkt_data_start = xdpf->data - xdpf->headroom; skb = build_skb(pkt_data_start, frame_size); if (!skb) return NULL; - skb_reserve(skb, xdp_pkt->headroom); - __skb_put(skb, xdp_pkt->len); - if (xdp_pkt->metasize) - skb_metadata_set(skb, xdp_pkt->metasize); + skb_reserve(skb, xdpf->headroom); + __skb_put(skb, xdpf->len); + if (xdpf->metasize) + skb_metadata_set(skb, xdpf->metasize); /* Essential SKB info: protocol and skb->dev */ - skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx); + skb->protocol = eth_type_trans(skb, xdpf->dev_rx); /* Optional SKB info, currently missing: * - HW checksum info (skb->ip_summed) @@ -265,6 +208,31 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, return skb; } +static void __cpu_map_ring_cleanup(struct ptr_ring *ring) +{ + /* The tear-down procedure should have made sure that queue is + * empty. See __cpu_map_entry_replace() and work-queue + * invoked cpu_map_kthread_stop(). Catch any broken behaviour + * gracefully and warn once. + */ + struct xdp_frame *xdpf; + + while ((xdpf = ptr_ring_consume(ring))) + if (WARN_ON_ONCE(xdpf)) + xdp_return_frame(xdpf); +} + +static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) +{ + if (atomic_dec_and_test(&rcpu->refcnt)) { + /* The queue should be empty at this point */ + __cpu_map_ring_cleanup(rcpu->queue); + ptr_ring_cleanup(rcpu->queue, NULL); + kfree(rcpu->queue); + kfree(rcpu); + } +} + static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; @@ -278,7 +246,7 @@ static int cpu_map_kthread_run(void *data) */ while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { unsigned int processed = 0, drops = 0, sched = 0; - struct xdp_pkt *xdp_pkt; + struct xdp_frame *xdpf; /* Release CPU reschedule checks */ if (__ptr_ring_empty(rcpu->queue)) { @@ -301,13 +269,13 @@ static int cpu_map_kthread_run(void *data) * kthread CPU pinned. Lockless access to ptr_ring * consume side valid as no-resize allowed of queue. */ - while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) { + while ((xdpf = __ptr_ring_consume(rcpu->queue))) { struct sk_buff *skb; int ret; - skb = cpu_map_build_skb(rcpu, xdp_pkt); + skb = cpu_map_build_skb(rcpu, xdpf); if (!skb) { - page_frag_free(xdp_pkt); + xdp_return_frame(xdpf); continue; } @@ -604,13 +572,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, spin_lock(&q->producer_lock); for (i = 0; i < bq->count; i++) { - void *xdp_pkt = bq->q[i]; + struct xdp_frame *xdpf = bq->q[i]; int err; - err = __ptr_ring_produce(q, xdp_pkt); + err = __ptr_ring_produce(q, xdpf); if (err) { drops++; - page_frag_free(xdp_pkt); /* Free xdp_pkt */ + xdp_return_frame(xdpf); } processed++; } @@ -625,7 +593,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, /* Runs under RCU-read-side, plus in softirq under NAPI protection. * Thus, safe percpu variable access. */ -static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt) +static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); @@ -636,28 +604,28 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt) * driver to code invoking us to finished, due to driver * (e.g. ixgbe) recycle tricks based on page-refcnt. * - * Thus, incoming xdp_pkt is always queued here (else we race + * Thus, incoming xdp_frame is always queued here (else we race * with another CPU on page-refcnt and remaining driver code). * Queue time is very short, as driver will invoke flush * operation, when completing napi->poll call. */ - bq->q[bq->count++] = xdp_pkt; + bq->q[bq->count++] = xdpf; return 0; } int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx) { - struct xdp_pkt *xdp_pkt; + struct xdp_frame *xdpf; - xdp_pkt = convert_to_xdp_pkt(xdp); - if (unlikely(!xdp_pkt)) + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) return -EOVERFLOW; /* Info needed when constructing SKB on remote CPU */ - xdp_pkt->dev_rx = dev_rx; + xdpf->dev_rx = dev_rx; - bq_enqueue(rcpu, xdp_pkt); + bq_enqueue(rcpu, xdpf); return 0; } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index bf6da59ae0d0..a41343009ccc 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -150,8 +150,154 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; } +struct map_iter { + void *key; + bool done; +}; + +static struct map_iter *map_iter(struct seq_file *m) +{ + return m->private; +} + +static struct bpf_map *seq_file_to_map(struct seq_file *m) +{ + return file_inode(m->file)->i_private; +} + +static void map_iter_free(struct map_iter *iter) +{ + if (iter) { + kfree(iter->key); + kfree(iter); + } +} + +static struct map_iter *map_iter_alloc(struct bpf_map *map) +{ + struct map_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); + if (!iter) + goto error; + + iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); + if (!iter->key) + goto error; + + return iter; + +error: + map_iter_free(iter); + return NULL; +} + +static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct bpf_map *map = seq_file_to_map(m); + void *key = map_iter(m)->key; + + if (map_iter(m)->done) + return NULL; + + if (unlikely(v == SEQ_START_TOKEN)) + goto done; + + if (map->ops->map_get_next_key(map, key, key)) { + map_iter(m)->done = true; + return NULL; + } + +done: + ++(*pos); + return key; +} + +static void *map_seq_start(struct seq_file *m, loff_t *pos) +{ + if (map_iter(m)->done) + return NULL; + + return *pos ? map_iter(m)->key : SEQ_START_TOKEN; +} + +static void map_seq_stop(struct seq_file *m, void *v) +{ +} + +static int map_seq_show(struct seq_file *m, void *v) +{ + struct bpf_map *map = seq_file_to_map(m); + void *key = map_iter(m)->key; + + if (unlikely(v == SEQ_START_TOKEN)) { + seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); + seq_puts(m, "# WARNING!! The output format will change\n"); + } else { + map->ops->map_seq_show_elem(map, key, m); + } + + return 0; +} + +static const struct seq_operations bpffs_map_seq_ops = { + .start = map_seq_start, + .next = map_seq_next, + .show = map_seq_show, + .stop = map_seq_stop, +}; + +static int bpffs_map_open(struct inode *inode, struct file *file) +{ + struct bpf_map *map = inode->i_private; + struct map_iter *iter; + struct seq_file *m; + int err; + + iter = map_iter_alloc(map); + if (!iter) + return -ENOMEM; + + err = seq_open(file, &bpffs_map_seq_ops); + if (err) { + map_iter_free(iter); + return err; + } + + m = file->private_data; + m->private = iter; + + return 0; +} + +static int bpffs_map_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + + map_iter_free(map_iter(m)); + + return seq_release(inode, file); +} + +/* bpffs_map_fops should only implement the basic + * read operation for a BPF map. The purpose is to + * provide a simple user intuitive way to do + * "cat bpffs/pathto/a-pinned-map". + * + * Other operations (e.g. write, lookup...) should be realized by + * the userspace tools (e.g. bpftool) through the + * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update + * interface. + */ +static const struct file_operations bpffs_map_fops = { + .open = bpffs_map_open, + .read = seq_read, + .release = bpffs_map_release, +}; + static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, - const struct inode_operations *iops) + const struct inode_operations *iops, + const struct file_operations *fops) { struct inode *dir = dentry->d_parent->d_inode; struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); @@ -159,6 +305,7 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, return PTR_ERR(inode); inode->i_op = iops; + inode->i_fop = fops; inode->i_private = raw; bpf_dentry_finalize(dentry, inode, dir); @@ -167,12 +314,15 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) { - return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops); + return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL); } static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) { - return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops); + struct bpf_map *map = arg; + + return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, + map->btf ? &bpffs_map_fops : NULL); } static struct dentry * diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ebfe9f29dae8..0bd2944eafb9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -11,6 +11,7 @@ */ #include <linux/bpf.h> #include <linux/bpf_trace.h> +#include <linux/btf.h> #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/sched/signal.h> @@ -26,6 +27,7 @@ #include <linux/cred.h> #include <linux/timekeeping.h> #include <linux/ctype.h> +#include <linux/btf.h> #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -250,6 +252,7 @@ static void bpf_map_free_deferred(struct work_struct *work) bpf_map_uncharge_memlock(map); security_bpf_map_free(map); + btf_put(map->btf); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -415,7 +418,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) return 0; } -#define BPF_MAP_CREATE_LAST_FIELD map_ifindex +#define BPF_MAP_CREATE_LAST_FIELD btf_value_id /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -449,6 +452,33 @@ static int map_create(union bpf_attr *attr) atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); + if (bpf_map_support_seq_show(map) && + (attr->btf_key_id || attr->btf_value_id)) { + struct btf *btf; + + if (!attr->btf_key_id || !attr->btf_value_id) { + err = -EINVAL; + goto free_map_nouncharge; + } + + btf = btf_get_by_fd(attr->btf_fd); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); + goto free_map_nouncharge; + } + + err = map->ops->map_check_btf(map, btf, attr->btf_key_id, + attr->btf_value_id); + if (err) { + btf_put(btf); + goto free_map_nouncharge; + } + + map->btf = btf; + map->btf_key_id = attr->btf_key_id; + map->btf_value_id = attr->btf_value_id; + } + err = security_bpf_map_alloc(map); if (err) goto free_map_nouncharge; @@ -481,6 +511,7 @@ free_map: free_map_sec: security_bpf_map_free(map); free_map_nouncharge: + btf_put(map->btf); map->ops->map_free(map); return err; } @@ -1883,6 +1914,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.load_time = prog->aux->load_time; info.created_by_uid = from_kuid_munged(current_user_ns(), prog->aux->user->uid); + info.gpl_compatible = prog->gpl_compatible; memcpy(info.tag, prog->tag, sizeof(prog->tag)); memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); @@ -2016,6 +2048,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, else if (f.file->f_op == &bpf_map_fops) err = bpf_map_get_info_by_fd(f.file->private_data, attr, uattr); + else if (f.file->f_op == &btf_fops) + err = btf_get_info_by_fd(f.file->private_data, attr, uattr); else err = -EINVAL; @@ -2023,6 +2057,19 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } +#define BPF_BTF_LOAD_LAST_FIELD btf_log_level + +static int bpf_btf_load(const union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_BTF_LOAD)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return btf_new_fd(attr); +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -2103,6 +2150,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_RAW_TRACEPOINT_OPEN: err = bpf_raw_tracepoint_open(&attr); break; + case BPF_BTF_LOAD: + err = bpf_btf_load(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5dd1dcb902bf..eb1a596aebd3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1914,7 +1914,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { expected_type = PTR_TO_STACK; - if (!type_is_pkt_pointer(type) && + if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && type != expected_type) goto err_type; } else if (arg_type == ARG_CONST_SIZE || @@ -1966,14 +1966,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose(env, "invalid map_ptr to access map->key\n"); return -EACCES; } - if (type_is_pkt_pointer(type)) - err = check_packet_access(env, regno, reg->off, - meta->map_ptr->key_size, - false); - else - err = check_stack_boundary(env, regno, - meta->map_ptr->key_size, - false, NULL); + err = check_helper_mem_access(env, regno, + meta->map_ptr->key_size, false, + NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity @@ -1983,14 +1978,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose(env, "invalid map_ptr to access map->value\n"); return -EACCES; } - if (type_is_pkt_pointer(type)) - err = check_packet_access(env, regno, reg->off, - meta->map_ptr->value_size, - false); - else - err = check_stack_boundary(env, regno, - meta->map_ptr->value_size, - false, NULL); + err = check_helper_mem_access(env, regno, + meta->map_ptr->value_size, false, + NULL); } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 15ea216a67ce..63d0816ab23b 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -22,6 +22,7 @@ #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/uidgid.h> #include <linux/uuid.h> #include <linux/ctype.h> #include <net/sock.h> @@ -231,30 +232,6 @@ out: return r; } -#ifdef CONFIG_NET -static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) -{ - struct kobject *kobj = data, *ksobj; - const struct kobj_ns_type_operations *ops; - - ops = kobj_ns_ops(kobj); - if (!ops && kobj->kset) { - ksobj = &kobj->kset->kobj; - if (ksobj->parent != NULL) - ops = kobj_ns_ops(ksobj->parent); - } - - if (ops && ops->netlink_ns && kobj->ktype->namespace) { - const void *sock_ns, *ns; - ns = kobj->ktype->namespace(kobj); - sock_ns = ops->netlink_ns(dsk); - return sock_ns != ns; - } - - return 0; -} -#endif - #ifdef CONFIG_UEVENT_HELPER static int kobj_usermode_filter(struct kobject *kobj) { @@ -296,15 +273,44 @@ static void cleanup_uevent_env(struct subprocess_info *info) } #endif -static int kobject_uevent_net_broadcast(struct kobject *kobj, - struct kobj_uevent_env *env, +#ifdef CONFIG_NET +static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env, const char *action_string, const char *devpath) { - int retval = 0; -#if defined(CONFIG_NET) + struct netlink_skb_parms *parms; + struct sk_buff *skb = NULL; + char *scratch; + size_t len; + + /* allocate message with maximum possible size */ + len = strlen(action_string) + strlen(devpath) + 2; + skb = alloc_skb(len + env->buflen, GFP_KERNEL); + if (!skb) + return NULL; + + /* add header */ + scratch = skb_put(skb, len); + sprintf(scratch, "%s@%s", action_string, devpath); + + skb_put_data(skb, env->buf, env->buflen); + + parms = &NETLINK_CB(skb); + parms->creds.uid = GLOBAL_ROOT_UID; + parms->creds.gid = GLOBAL_ROOT_GID; + parms->dst_group = 1; + parms->portid = 0; + + return skb; +} + +static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ struct sk_buff *skb = NULL; struct uevent_sock *ue_sk; + int retval = 0; /* send netlink message */ list_for_each_entry(ue_sk, &uevent_sock_list, list) { @@ -314,37 +320,99 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, continue; if (!skb) { - /* allocate message with the maximum possible size */ - size_t len = strlen(action_string) + strlen(devpath) + 2; - char *scratch; - retval = -ENOMEM; - skb = alloc_skb(len + env->buflen, GFP_KERNEL); + skb = alloc_uevent_skb(env, action_string, devpath); if (!skb) continue; - - /* add header */ - scratch = skb_put(skb, len); - sprintf(scratch, "%s@%s", action_string, devpath); - - skb_put_data(skb, env->buf, env->buflen); - - NETLINK_CB(skb).dst_group = 1; } - retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb), - 0, 1, GFP_KERNEL, - kobj_bcast_filter, - kobj); + retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1, + GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS || retval == -ESRCH) retval = 0; } consume_skb(skb); -#endif + return retval; } +static int uevent_net_broadcast_tagged(struct sock *usk, + struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ + struct user_namespace *owning_user_ns = sock_net(usk)->user_ns; + struct sk_buff *skb = NULL; + int ret = 0; + + skb = alloc_uevent_skb(env, action_string, devpath); + if (!skb) + return -ENOMEM; + + /* fix credentials */ + if (owning_user_ns != &init_user_ns) { + struct netlink_skb_parms *parms = &NETLINK_CB(skb); + kuid_t root_uid; + kgid_t root_gid; + + /* fix uid */ + root_uid = make_kuid(owning_user_ns, 0); + if (uid_valid(root_uid)) + parms->creds.uid = root_uid; + + /* fix gid */ + root_gid = make_kgid(owning_user_ns, 0); + if (gid_valid(root_gid)) + parms->creds.gid = root_gid; + } + + ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL); + /* ENOBUFS should be handled in userspace */ + if (ret == -ENOBUFS || ret == -ESRCH) + ret = 0; + + return ret; +} +#endif + +static int kobject_uevent_net_broadcast(struct kobject *kobj, + struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ + int ret = 0; + +#ifdef CONFIG_NET + const struct kobj_ns_type_operations *ops; + const struct net *net = NULL; + + ops = kobj_ns_ops(kobj); + if (!ops && kobj->kset) { + struct kobject *ksobj = &kobj->kset->kobj; + if (ksobj->parent != NULL) + ops = kobj_ns_ops(ksobj->parent); + } + + /* kobjects currently only carry network namespace tags and they + * are the only tag relevant here since we want to decide which + * network namespaces to broadcast the uevent into. + */ + if (ops && ops->netlink_ns && kobj->ktype->namespace) + if (ops->type == KOBJ_NS_TYPE_NET) + net = kobj->ktype->namespace(kobj); + + if (!net) + ret = uevent_net_broadcast_untagged(env, action_string, + devpath); + else + ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env, + action_string, devpath); +#endif + + return ret; +} + static void zap_modalias_env(struct kobj_uevent_env *env) { static const char modalias_prefix[] = "MODALIAS="; @@ -703,9 +771,13 @@ static int uevent_net_init(struct net *net) net->uevent_sock = ue_sk; - mutex_lock(&uevent_sock_mutex); - list_add_tail(&ue_sk->list, &uevent_sock_list); - mutex_unlock(&uevent_sock_mutex); + /* Restrict uevents to initial user namespace. */ + if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { + mutex_lock(&uevent_sock_mutex); + list_add_tail(&ue_sk->list, &uevent_sock_list); + mutex_unlock(&uevent_sock_mutex); + } + return 0; } @@ -713,9 +785,11 @@ static void uevent_net_exit(struct net *net) { struct uevent_sock *ue_sk = net->uevent_sock; - mutex_lock(&uevent_sock_mutex); - list_del(&ue_sk->list); - mutex_unlock(&uevent_sock_mutex); + if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { + mutex_lock(&uevent_sock_mutex); + list_del(&ue_sk->list); + mutex_unlock(&uevent_sock_mutex); + } netlink_kernel_release(ue_sk->sk); kfree(ue_sk); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 2b2b79974b61..9427b5766134 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -668,8 +668,9 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * For a completely stable walk you should construct your own data * structure outside the hash table. * - * This function may sleep so you must not call it from interrupt - * context or with spin locks held. + * This function may be called from any process context, including + * non-preemptable context, but cannot be called from softirq or + * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ @@ -726,6 +727,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; + bool rhlist = ht->rhlist; rcu_read_lock(); @@ -734,11 +736,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker.tbl && !iter->end_of_table) { + if (iter->end_of_table) + return 0; + if (!iter->walker.tbl) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); + iter->slot = 0; + iter->skip = 0; return -EAGAIN; } + if (iter->p && !rhlist) { + /* + * We need to validate that 'p' is still in the table, and + * if so, update 'skip' + */ + struct rhash_head *p; + int skip = 0; + rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { + skip++; + if (p == iter->p) { + iter->skip = skip; + goto found; + } + } + iter->p = NULL; + } else if (iter->p && rhlist) { + /* Need to validate that 'list' is still in the table, and + * if so, update 'skip' and 'p'. + */ + struct rhash_head *p; + struct rhlist_head *list; + int skip = 0; + rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { + for (list = container_of(p, struct rhlist_head, rhead); + list; + list = rcu_dereference(list->next)) { + skip++; + if (list == iter->list) { + iter->p = p; + skip = skip; + goto found; + } + } + } + iter->p = NULL; + } +found: return 0; } EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); @@ -914,8 +957,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) iter->walker.tbl = NULL; spin_unlock(&ht->lock); - iter->p = NULL; - out: rcu_read_unlock(); } diff --git a/net/Kconfig b/net/Kconfig index 0428f12c25c2..b62089fb1332 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -407,6 +407,9 @@ config GRO_CELLS bool default n +config SOCK_VALIDATE_XMIT + bool + config NET_DEVLINK tristate "Network physical/parent device Netlink interface" help @@ -423,6 +426,9 @@ config MAY_USE_DEVLINK on MAY_USE_DEVLINK to ensure they do not cause link errors when devlink is a loadable module and the driver using it is built-in. +config PAGE_POOL + bool + endif # if NET # Used by archs to tell that they support BPF JIT compiler plus which flavour. diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2ced48662c1f..68c3578343b4 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.rxq = &rxqueue->xdp_rxq; retval = bpf_test_run(prog, &xdp, repeat, &duration); - if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN) + if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN || + xdp.data_end != xdp.data + size) size = xdp.data_end - xdp.data; ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); kfree(data); diff --git a/net/bridge/br.c b/net/bridge/br.c index 671d13c10f6f..b0a0b82e2d91 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; + bool notified = false; bool changed_addr; int err; @@ -67,7 +68,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; case NETDEV_CHANGE: - br_port_carrier_check(p); + br_port_carrier_check(p, ¬ified); break; case NETDEV_FEAT_CHANGE: @@ -76,8 +77,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v case NETDEV_DOWN: spin_lock_bh(&br->lock); - if (br->dev->flags & IFF_UP) + if (br->dev->flags & IFF_UP) { br_stp_disable_port(p); + notified = true; + } spin_unlock_bh(&br->lock); break; @@ -85,6 +88,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v if (netif_running(br->dev) && netif_oper_up(dev)) { spin_lock_bh(&br->lock); br_stp_enable_port(p); + notified = true; spin_unlock_bh(&br->lock); } break; @@ -110,8 +114,8 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v } /* Events that may cause spanning tree to refresh */ - if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || - event == NETDEV_CHANGE || event == NETDEV_DOWN) + if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP || + event == NETDEV_CHANGE || event == NETDEV_DOWN)) br_ifinfo_notify(RTM_NEWLINK, NULL, p); return NOTIFY_DONE; @@ -141,7 +145,7 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, - fdb_info->vid); + fdb_info->vid, false); if (err) { err = notifier_from_errno(err); break; @@ -152,7 +156,7 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_DEL_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_del(br, p, fdb_info->addr, - fdb_info->vid); + fdb_info->vid, false); if (err) err = notifier_from_errno(err); break; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d9e69e4514be..b19e3104afd6 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -40,7 +40,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, - const struct net_bridge_fdb_entry *, int); + const struct net_bridge_fdb_entry *, int, bool); int __init br_fdb_init(void) { @@ -121,6 +121,28 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, return fdb; } +struct net_device *br_fdb_find_port(const struct net_device *br_dev, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_entry *f; + struct net_device *dev = NULL; + struct net_bridge *br; + + ASSERT_RTNL(); + + if (!netif_is_bridge_master(br_dev)) + return NULL; + + br = netdev_priv(br_dev); + f = br_fdb_find(br, addr, vid); + if (f && f->dst) + dev = f->dst->dev; + + return dev; +} +EXPORT_SYMBOL_GPL(br_fdb_find_port); + struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, const unsigned char *addr, __u16 vid) @@ -173,7 +195,8 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) } } -static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) +static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f, + bool swdev_notify) { trace_fdb_delete(br, f); @@ -183,7 +206,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) hlist_del_init_rcu(&f->fdb_node); rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, br_fdb_rht_params); - fdb_notify(br, f, RTM_DELNEIGH); + fdb_notify(br, f, RTM_DELNEIGH, swdev_notify); call_rcu(&f->rcu, fdb_rcu_free); } @@ -219,7 +242,7 @@ static void fdb_delete_local(struct net_bridge *br, return; } - fdb_delete(br, f); + fdb_delete(br, f, true); } void br_fdb_find_delete_local(struct net_bridge *br, @@ -334,7 +357,7 @@ void br_fdb_cleanup(struct work_struct *work) } else { spin_lock_bh(&br->hash_lock); if (!hlist_unhashed(&f->fdb_node)) - fdb_delete(br, f); + fdb_delete(br, f, true); spin_unlock_bh(&br->hash_lock); } } @@ -354,7 +377,7 @@ void br_fdb_flush(struct net_bridge *br) spin_lock_bh(&br->hash_lock); hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { if (!f->is_static) - fdb_delete(br, f); + fdb_delete(br, f, true); } spin_unlock_bh(&br->hash_lock); } @@ -383,7 +406,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, if (f->is_local) fdb_delete_local(br, p, f); else - fdb_delete(br, f); + fdb_delete(br, f, true); } spin_unlock_bh(&br->hash_lock); } @@ -509,7 +532,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return 0; br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", source ? source->dev->name : br->dev->name, addr, vid); - fdb_delete(br, fdb); + fdb_delete(br, fdb, true); } fdb = fdb_create(br, source, addr, vid, 1, 1); @@ -517,7 +540,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return -ENOMEM; fdb_add_hw_addr(br, addr); - fdb_notify(br, fdb, RTM_NEWNEIGH); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); return 0; } @@ -572,7 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->added_by_user = 1; if (unlikely(fdb_modified)) { trace_br_fdb_update(br, source, addr, vid, added_by_user); - fdb_notify(br, fdb, RTM_NEWNEIGH); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); } } } else { @@ -583,7 +606,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->added_by_user = 1; trace_br_fdb_update(br, source, addr, vid, added_by_user); - fdb_notify(br, fdb, RTM_NEWNEIGH); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); } /* else we lose race and someone else inserts * it first, don't bother updating @@ -665,13 +688,15 @@ static inline size_t fdb_nlmsg_size(void) } static void fdb_notify(struct net_bridge *br, - const struct net_bridge_fdb_entry *fdb, int type) + const struct net_bridge_fdb_entry *fdb, int type, + bool swdev_notify) { struct net *net = dev_net(br->dev); struct sk_buff *skb; int err = -ENOBUFS; - br_switchdev_fdb_notify(fdb, type); + if (swdev_notify) + br_switchdev_fdb_notify(fdb, type); skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) @@ -810,7 +835,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, fdb->used = jiffies; if (modified) { fdb->updated = jiffies; - fdb_notify(br, fdb, RTM_NEWNEIGH); + fdb_notify(br, fdb, RTM_NEWNEIGH, true); } return 0; @@ -834,7 +859,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { - err = br_fdb_external_learn_add(br, p, addr, vid); + err = br_fdb_external_learn_add(br, p, addr, vid, true); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm->ndm_state, @@ -923,7 +948,7 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br, if (!fdb || fdb->dst != p) return -ENOENT; - fdb_delete(br, fdb); + fdb_delete(br, fdb, true); return 0; } @@ -1043,7 +1068,8 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + bool swdev_notify) { struct net_bridge_fdb_entry *fdb; bool modified = false; @@ -1061,7 +1087,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, goto err_unlock; } fdb->added_by_external_learn = 1; - fdb_notify(br, fdb, RTM_NEWNEIGH); + fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } else { fdb->updated = jiffies; @@ -1080,7 +1106,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, } if (modified) - fdb_notify(br, fdb, RTM_NEWNEIGH); + fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } err_unlock: @@ -1090,7 +1116,8 @@ err_unlock: } int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, + bool swdev_notify) { struct net_bridge_fdb_entry *fdb; int err = 0; @@ -1099,7 +1126,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, fdb = br_fdb_find(br, addr, vid); if (fdb && fdb->added_by_external_learn) - fdb_delete(br, fdb); + fdb_delete(br, fdb, swdev_notify); else err = -ENOENT; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index b4eed113d2ec..7a7fd672ccf2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -274,8 +274,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct net_bridge_port *port, *lport, *rport; lport = p ? p->port : NULL; - rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : - NULL; + rport = hlist_entry_safe(rp, struct net_bridge_port, rlist); if ((unsigned long)lport > (unsigned long)rport) { port = lport; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 5bb6681fa91e..05e42d86882d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -64,7 +64,7 @@ static int port_cost(struct net_device *dev) /* Check for port carrier transitions. */ -void br_port_carrier_check(struct net_bridge_port *p) +void br_port_carrier_check(struct net_bridge_port *p, bool *notified) { struct net_device *dev = p->dev; struct net_bridge *br = p->br; @@ -73,16 +73,21 @@ void br_port_carrier_check(struct net_bridge_port *p) netif_running(dev) && netif_oper_up(dev)) p->path_cost = port_cost(dev); + *notified = false; if (!netif_running(br->dev)) return; spin_lock_bh(&br->lock); if (netif_running(dev) && netif_oper_up(dev)) { - if (p->state == BR_STATE_DISABLED) + if (p->state == BR_STATE_DISABLED) { br_stp_enable_port(p); + *notified = true; + } } else { - if (p->state != BR_STATE_DISABLED) + if (p->state != BR_STATE_DISABLED) { br_stp_disable_port(p); + *notified = true; + } } spin_unlock_bh(&br->lock); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a7cb3ece5031..742f40aefdaf 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -553,9 +553,11 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + bool swdev_notify); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, + bool swdev_notify); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); @@ -573,7 +575,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, enum br_pkt_type pkt_type, bool local_rcv, bool local_orig); /* br_if.c */ -void br_port_carrier_check(struct net_bridge_port *p); +void br_port_carrier_check(struct net_bridge_port *p, bool *notified); int br_add_bridge(struct net *net, const char *name); int br_del_bridge(struct net *net, const char *name); int br_add_if(struct net_bridge *br, struct net_device *dev, @@ -594,11 +596,22 @@ static inline bool br_rx_handler_check_rcu(const struct net_device *dev) return rcu_dereference(dev->rx_handler) == br_handle_frame; } +static inline bool br_rx_handler_check_rtnl(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->rx_handler) == br_handle_frame; +} + static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) { return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; } +static inline struct net_bridge_port * +br_port_get_check_rtnl(const struct net_device *dev) +{ + return br_rx_handler_check_rtnl(dev) ? br_port_get_rtnl_rcu(dev) : NULL; +} + /* br_ioctl.c */ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index ee775f4ff76c..35474d49555d 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -102,13 +102,15 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, static void br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, - u16 vid, struct net_device *dev) + u16 vid, struct net_device *dev, + bool added_by_user) { struct switchdev_notifier_fdb_info info; unsigned long notifier_type; info.addr = mac; info.vid = vid; + info.added_by_user = added_by_user; notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; call_switchdev_notifiers(notifier_type, dev, &info.info); } @@ -116,19 +118,21 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) { - if (!fdb->added_by_user || !fdb->dst) + if (!fdb->dst) return; switch (type) { case RTM_DELNEIGH: br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr, fdb->key.vlan_id, - fdb->dst->dev); + fdb->dst->dev, + fdb->added_by_user); break; case RTM_NEWNEIGH: br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr, fdb->key.vlan_id, - fdb->dst->dev); + fdb->dst->dev, + fdb->added_by_user); break; } } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9896f4975353..df37a5137c25 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1149,3 +1149,42 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, stats->tx_packets += txpackets; } } + +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) +{ + struct net_bridge_vlan_group *vg; + + ASSERT_RTNL(); + if (netif_is_bridge_master(dev)) + vg = br_vlan_group(netdev_priv(dev)); + else + return -EINVAL; + + *p_pvid = br_get_pvid(vg); + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_pvid); + +int br_vlan_get_info(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge_port *p; + + ASSERT_RTNL(); + p = br_port_get_check_rtnl(dev); + if (p) + vg = nbp_vlan_group(p); + else + return -EINVAL; + + v = br_vlan_find(vg, vid); + if (!v) + return -ENOENT; + + p_vinfo->vid = vid; + p_vinfo->flags = v->flags; + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_info); diff --git a/net/core/Makefile b/net/core/Makefile index 6dbbba8c57ae..7080417f8bc8 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -14,6 +14,7 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ fib_notifier.o xdp.o obj-y += net-sysfs.o +obj-$(CONFIG_PAGE_POOL) += page_pool.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o diff --git a/net/core/dev.c b/net/core/dev.c index af0558b00c6c..bb81a6e1d354 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1285,6 +1285,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return len; } +EXPORT_SYMBOL(dev_set_alias); /** * dev_get_alias - get ifalias of a device @@ -1586,7 +1587,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) - }; + } #undef N return "UNKNOWN_NETDEV_EVENT"; } @@ -2614,17 +2615,16 @@ EXPORT_SYMBOL(netif_device_attach); * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues) +static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb) { u32 hash; u16 qoffset = 0; - u16 qcount = num_tx_queues; + u16 qcount = dev->real_num_tx_queues; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; + while (unlikely(hash >= qcount)) + hash -= qcount; return hash; } @@ -2637,7 +2637,6 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; } -EXPORT_SYMBOL(__skb_tx_hash); static void skb_warn_bad_offload(const struct sk_buff *skb) { @@ -3113,6 +3112,10 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; + skb = sk_validate_xmit_skb(skb, dev); + if (unlikely(!skb)) + goto out_null; + if (netif_needs_gso(skb, features)) { struct sk_buff *segs; @@ -3996,9 +3999,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { struct netdev_rx_queue *rxqueue; + void *orig_data, *orig_data_end; u32 metalen, act = XDP_DROP; struct xdp_buff xdp; - void *orig_data; int hlen, off; u32 mac_len; @@ -4037,6 +4040,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp.data_meta = xdp.data; xdp.data_end = xdp.data + hlen; xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data_end = xdp.data_end; orig_data = xdp.data; rxqueue = netif_get_rxqueue(skb); @@ -4051,6 +4055,15 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, __skb_push(skb, -off); skb->mac_header += off; + /* check if bpf_xdp_adjust_tail was used. it can only "shrink" + * pckt. + */ + off = orig_data_end - xdp.data_end; + if (off != 0) { + skb_set_tail_pointer(skb, xdp.data_end - xdp.data); + skb->len -= off; + } + switch (act) { case XDP_REDIRECT: case XDP_TX: @@ -7870,6 +7883,8 @@ int register_netdevice(struct net_device *dev) int ret; struct net *net = dev_net(dev); + BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE < + NETDEV_FEATURE_COUNT); BUG_ON(dev_boot_phase); ASSERT_RTNL(); diff --git a/net/core/dst.c b/net/core/dst.c index 007aa0b08291..2d9b37f8944a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -58,6 +58,7 @@ const struct dst_metrics dst_default_metrics = { */ .refcnt = REFCOUNT_INIT(1), }; +EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index ba02f0dfe85c..c15075dc7572 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -92,6 +92,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", + [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", @@ -109,6 +110,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record", + [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload", }; static const char @@ -210,23 +212,6 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) return ret; } -static int phy_get_sset_count(struct phy_device *phydev) -{ - int ret; - - if (phydev->drv->get_sset_count && - phydev->drv->get_strings && - phydev->drv->get_stats) { - mutex_lock(&phydev->lock); - ret = phydev->drv->get_sset_count(phydev); - mutex_unlock(&phydev->lock); - - return ret; - } - - return -EOPNOTSUPP; -} - static int __ethtool_get_sset_count(struct net_device *dev, int sset) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -243,12 +228,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_PHY_TUNABLES) return ARRAY_SIZE(phy_tunable_strings); - if (sset == ETH_SS_PHY_STATS) { - if (dev->phydev) - return phy_get_sset_count(dev->phydev); - else - return -EOPNOTSUPP; - } + if (sset == ETH_SS_PHY_STATS && dev->phydev && + !ops->get_ethtool_phy_stats) + return phy_ethtool_get_sset_count(dev->phydev); if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); @@ -271,17 +253,10 @@ static void __ethtool_get_strings(struct net_device *dev, memcpy(data, tunable_strings, sizeof(tunable_strings)); else if (stringset == ETH_SS_PHY_TUNABLES) memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings)); - else if (stringset == ETH_SS_PHY_STATS) { - struct phy_device *phydev = dev->phydev; - - if (phydev) { - mutex_lock(&phydev->lock); - phydev->drv->get_strings(phydev, data); - mutex_unlock(&phydev->lock); - } else { - return; - } - } else + else if (stringset == ETH_SS_PHY_STATS && dev->phydev && + !ops->get_ethtool_phy_stats) + phy_ethtool_get_strings(dev->phydev, data); + else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); } @@ -1998,15 +1973,19 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) { - struct ethtool_stats stats; + const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + struct ethtool_stats stats; u64 *data; int ret, n_stats; - if (!phydev) + if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count)) return -EOPNOTSUPP; - n_stats = phy_get_sset_count(phydev); + if (dev->phydev && !ops->get_ethtool_phy_stats) + n_stats = phy_ethtool_get_sset_count(dev->phydev); + else + n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); if (n_stats < 0) return n_stats; if (n_stats > S32_MAX / sizeof(u64)) @@ -2021,9 +2000,13 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) if (n_stats && !data) return -ENOMEM; - mutex_lock(&phydev->lock); - phydev->drv->get_stats(phydev, &stats, data); - mutex_unlock(&phydev->lock); + if (dev->phydev && !ops->get_ethtool_phy_stats) { + ret = phy_ethtool_get_stats(dev->phydev, &stats, data); + if (ret < 0) + return ret; + } else { + ops->get_ethtool_phy_stats(dev, &stats, data); + } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 33958f84c173..126ffc5bc630 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -387,247 +387,304 @@ unsigned int fib_rules_seq_read(struct net *net, int family) } EXPORT_SYMBOL_GPL(fib_rules_seq_read); -static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, - struct fib_rules_ops *ops) -{ - int err = -EINVAL; - - if (frh->src_len) - if (tb[FRA_SRC] == NULL || - frh->src_len > (ops->addr_size * 8) || - nla_len(tb[FRA_SRC]) != ops->addr_size) - goto errout; - - if (frh->dst_len) - if (tb[FRA_DST] == NULL || - frh->dst_len > (ops->addr_size * 8) || - nla_len(tb[FRA_DST]) != ops->addr_size) - goto errout; - - err = 0; -errout: - return err; -} - -static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, - struct nlattr **tb, struct fib_rule *rule) +static struct fib_rule *rule_find(struct fib_rules_ops *ops, + struct fib_rule_hdr *frh, + struct nlattr **tb, + struct fib_rule *rule, + bool user_priority) { struct fib_rule *r; list_for_each_entry(r, &ops->rules_list, list) { - if (r->action != rule->action) + if (rule->action && r->action != rule->action) continue; - if (r->table != rule->table) + if (rule->table && r->table != rule->table) continue; - if (r->pref != rule->pref) + if (user_priority && r->pref != rule->pref) continue; - if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) + if (rule->iifname[0] && + memcmp(r->iifname, rule->iifname, IFNAMSIZ)) continue; - if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) + if (rule->oifname[0] && + memcmp(r->oifname, rule->oifname, IFNAMSIZ)) continue; - if (r->mark != rule->mark) + if (rule->mark && r->mark != rule->mark) continue; - if (r->mark_mask != rule->mark_mask) + if (rule->mark_mask && r->mark_mask != rule->mark_mask) continue; - if (r->tun_id != rule->tun_id) + if (rule->tun_id && r->tun_id != rule->tun_id) continue; if (r->fr_net != rule->fr_net) continue; - if (r->l3mdev != rule->l3mdev) + if (rule->l3mdev && r->l3mdev != rule->l3mdev) continue; - if (!uid_eq(r->uid_range.start, rule->uid_range.start) || - !uid_eq(r->uid_range.end, rule->uid_range.end)) + if (uid_range_set(&rule->uid_range) && + (!uid_eq(r->uid_range.start, rule->uid_range.start) || + !uid_eq(r->uid_range.end, rule->uid_range.end))) continue; - if (r->ip_proto != rule->ip_proto) + if (rule->ip_proto && r->ip_proto != rule->ip_proto) continue; - if (!fib_rule_port_range_compare(&r->sport_range, + if (fib_rule_port_range_set(&rule->sport_range) && + !fib_rule_port_range_compare(&r->sport_range, &rule->sport_range)) continue; - if (!fib_rule_port_range_compare(&r->dport_range, + if (fib_rule_port_range_set(&rule->dport_range) && + !fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; if (!ops->compare(r, frh, tb)) continue; - return 1; + return r; + } + + return NULL; +} + +#ifdef CONFIG_NET_L3_MASTER_DEV +static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, + struct netlink_ext_ack *extack) +{ + nlrule->l3mdev = nla_get_u8(nla); + if (nlrule->l3mdev != 1) { + NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute"); + return -1; } + return 0; } +#else +static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel"); + return -1; +} +#endif -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct fib_rules_ops *ops, + struct nlattr *tb[], + struct fib_rule **rule, + bool *user_priority) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); - struct fib_rules_ops *ops = NULL; - struct fib_rule *rule, *r, *last = NULL; - struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL, unresolved = 0; - - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) - goto errout; + struct fib_rule *nlrule = NULL; + int err = -EINVAL; - ops = lookup_rules_ops(net, frh->family); - if (ops == NULL) { - err = -EAFNOSUPPORT; - goto errout; + if (frh->src_len) + if (!tb[FRA_SRC] || + frh->src_len > (ops->addr_size * 8) || + nla_len(tb[FRA_SRC]) != ops->addr_size) { + NL_SET_ERR_MSG(extack, "Invalid source address"); + goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); - if (err < 0) - goto errout; - - err = validate_rulemsg(frh, tb, ops); - if (err < 0) - goto errout; + if (frh->dst_len) + if (!tb[FRA_DST] || + frh->dst_len > (ops->addr_size * 8) || + nla_len(tb[FRA_DST]) != ops->addr_size) { + NL_SET_ERR_MSG(extack, "Invalid dst address"); + goto errout; + } - rule = kzalloc(ops->rule_size, GFP_KERNEL); - if (rule == NULL) { + nlrule = kzalloc(ops->rule_size, GFP_KERNEL); + if (!nlrule) { err = -ENOMEM; goto errout; } - refcount_set(&rule->refcnt, 1); - rule->fr_net = net; + refcount_set(&nlrule->refcnt, 1); + nlrule->fr_net = net; - rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) - : fib_default_rule_pref(ops); + if (tb[FRA_PRIORITY]) { + nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]); + *user_priority = true; + } else { + nlrule->pref = fib_default_rule_pref(ops); + } - rule->proto = tb[FRA_PROTOCOL] ? + nlrule->proto = tb[FRA_PROTOCOL] ? nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC; if (tb[FRA_IIFNAME]) { struct net_device *dev; - rule->iifindex = -1; - nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, rule->iifname); + nlrule->iifindex = -1; + nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, nlrule->iifname); if (dev) - rule->iifindex = dev->ifindex; + nlrule->iifindex = dev->ifindex; } if (tb[FRA_OIFNAME]) { struct net_device *dev; - rule->oifindex = -1; - nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, rule->oifname); + nlrule->oifindex = -1; + nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, nlrule->oifname); if (dev) - rule->oifindex = dev->ifindex; + nlrule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { - rule->mark = nla_get_u32(tb[FRA_FWMARK]); - if (rule->mark) + nlrule->mark = nla_get_u32(tb[FRA_FWMARK]); + if (nlrule->mark) /* compatibility: if the mark value is non-zero all bits * are compared unless a mask is explicitly specified. */ - rule->mark_mask = 0xFFFFFFFF; + nlrule->mark_mask = 0xFFFFFFFF; } if (tb[FRA_FWMASK]) - rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); + nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); if (tb[FRA_TUN_ID]) - rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); + nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); err = -EINVAL; - if (tb[FRA_L3MDEV]) { -#ifdef CONFIG_NET_L3_MASTER_DEV - rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]); - if (rule->l3mdev != 1) -#endif - goto errout_free; - } + if (tb[FRA_L3MDEV] && + fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0) + goto errout_free; - rule->action = frh->action; - rule->flags = frh->flags; - rule->table = frh_get_table(frh, tb); + nlrule->action = frh->action; + nlrule->flags = frh->flags; + nlrule->table = frh_get_table(frh, tb); if (tb[FRA_SUPPRESS_PREFIXLEN]) - rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]); + nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]); else - rule->suppress_prefixlen = -1; + nlrule->suppress_prefixlen = -1; if (tb[FRA_SUPPRESS_IFGROUP]) - rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]); + nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]); else - rule->suppress_ifgroup = -1; + nlrule->suppress_ifgroup = -1; if (tb[FRA_GOTO]) { - if (rule->action != FR_ACT_GOTO) + if (nlrule->action != FR_ACT_GOTO) { + NL_SET_ERR_MSG(extack, "Unexpected goto"); goto errout_free; + } - rule->target = nla_get_u32(tb[FRA_GOTO]); + nlrule->target = nla_get_u32(tb[FRA_GOTO]); /* Backward jumps are prohibited to avoid endless loops */ - if (rule->target <= rule->pref) + if (nlrule->target <= nlrule->pref) { + NL_SET_ERR_MSG(extack, "Backward goto not supported"); goto errout_free; - - list_for_each_entry(r, &ops->rules_list, list) { - if (r->pref == rule->target) { - RCU_INIT_POINTER(rule->ctarget, r); - break; - } } - - if (rcu_dereference_protected(rule->ctarget, 1) == NULL) - unresolved = 1; - } else if (rule->action == FR_ACT_GOTO) + } else if (nlrule->action == FR_ACT_GOTO) { + NL_SET_ERR_MSG(extack, "Missing goto target for action goto"); goto errout_free; + } - if (rule->l3mdev && rule->table) + if (nlrule->l3mdev && nlrule->table) { + NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive"); goto errout_free; + } if (tb[FRA_UID_RANGE]) { if (current_user_ns() != net->user_ns) { err = -EPERM; + NL_SET_ERR_MSG(extack, "No permission to set uid"); goto errout_free; } - rule->uid_range = nla_get_kuid_range(tb); + nlrule->uid_range = nla_get_kuid_range(tb); - if (!uid_range_set(&rule->uid_range) || - !uid_lte(rule->uid_range.start, rule->uid_range.end)) + if (!uid_range_set(&nlrule->uid_range) || + !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) { + NL_SET_ERR_MSG(extack, "Invalid uid range"); goto errout_free; + } } else { - rule->uid_range = fib_kuid_range_unset; + nlrule->uid_range = fib_kuid_range_unset; } if (tb[FRA_IP_PROTO]) - rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]); + nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]); if (tb[FRA_SPORT_RANGE]) { err = nla_get_port_range(tb[FRA_SPORT_RANGE], - &rule->sport_range); - if (err) + &nlrule->sport_range); + if (err) { + NL_SET_ERR_MSG(extack, "Invalid sport range"); goto errout_free; + } } if (tb[FRA_DPORT_RANGE]) { err = nla_get_port_range(tb[FRA_DPORT_RANGE], - &rule->dport_range); - if (err) + &nlrule->dport_range); + if (err) { + NL_SET_ERR_MSG(extack, "Invalid dport range"); goto errout_free; + } } + *rule = nlrule; + + return 0; + +errout_free: + kfree(nlrule); +errout: + return err; +} + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule = NULL, *r, *last = NULL; + struct nlattr *tb[FRA_MAX + 1]; + int err = -EINVAL, unresolved = 0; + bool user_priority = false; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { + NL_SET_ERR_MSG(extack, "Invalid msg length"); + goto errout; + } + + ops = lookup_rules_ops(net, frh->family); + if (!ops) { + err = -EAFNOSUPPORT; + NL_SET_ERR_MSG(extack, "Rule family not supported"); + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Error parsing msg"); + goto errout; + } + + err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority); + if (err) + goto errout; + if ((nlh->nlmsg_flags & NLM_F_EXCL) && - rule_exists(ops, frh, tb, rule)) { + rule_find(ops, frh, tb, rule, user_priority)) { err = -EEXIST; goto errout_free; } - err = ops->configure(rule, skb, frh, tb); + err = ops->configure(rule, skb, frh, tb, extack); if (err < 0) goto errout_free; @@ -637,6 +694,16 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout_free; list_for_each_entry(r, &ops->rules_list, list) { + if (r->pref == rule->target) { + RCU_INIT_POINTER(rule->ctarget, r); + break; + } + } + + if (rcu_dereference_protected(rule->ctarget, 1) == NULL) + unresolved = 1; + + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; last = r; @@ -690,171 +757,97 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); - struct fib_rule_port_range sprange = {0, 0}; - struct fib_rule_port_range dprange = {0, 0}; struct fib_rules_ops *ops = NULL; - struct fib_rule *rule, *r; + struct fib_rule *rule = NULL, *r, *nlrule = NULL; struct nlattr *tb[FRA_MAX+1]; - struct fib_kuid_range range; int err = -EINVAL; + bool user_priority = false; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { + NL_SET_ERR_MSG(extack, "Invalid msg length"); goto errout; + } ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = -EAFNOSUPPORT; + NL_SET_ERR_MSG(extack, "Rule family not supported"); goto errout; } err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); - if (err < 0) + if (err < 0) { + NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; + } - err = validate_rulemsg(frh, tb, ops); - if (err < 0) + err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority); + if (err) goto errout; - if (tb[FRA_UID_RANGE]) { - range = nla_get_kuid_range(tb); - if (!uid_range_set(&range)) { - err = -EINVAL; - goto errout; - } - } else { - range = fib_kuid_range_unset; + rule = rule_find(ops, frh, tb, nlrule, user_priority); + if (!rule) { + err = -ENOENT; + goto errout; } - if (tb[FRA_SPORT_RANGE]) { - err = nla_get_port_range(tb[FRA_SPORT_RANGE], - &sprange); - if (err) - goto errout; + if (rule->flags & FIB_RULE_PERMANENT) { + err = -EPERM; + goto errout; } - if (tb[FRA_DPORT_RANGE]) { - err = nla_get_port_range(tb[FRA_DPORT_RANGE], - &dprange); + if (ops->delete) { + err = ops->delete(rule); if (err) goto errout; } - list_for_each_entry(rule, &ops->rules_list, list) { - if (tb[FRA_PROTOCOL] && - (rule->proto != nla_get_u8(tb[FRA_PROTOCOL]))) - continue; - - if (frh->action && (frh->action != rule->action)) - continue; - - if (frh_get_table(frh, tb) && - (frh_get_table(frh, tb) != rule->table)) - continue; - - if (tb[FRA_PRIORITY] && - (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) - continue; - - if (tb[FRA_IIFNAME] && - nla_strcmp(tb[FRA_IIFNAME], rule->iifname)) - continue; - - if (tb[FRA_OIFNAME] && - nla_strcmp(tb[FRA_OIFNAME], rule->oifname)) - continue; - - if (tb[FRA_FWMARK] && - (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) - continue; - - if (tb[FRA_FWMASK] && - (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) - continue; - - if (tb[FRA_TUN_ID] && - (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID]))) - continue; - - if (tb[FRA_L3MDEV] && - (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) - continue; - - if (uid_range_set(&range) && - (!uid_eq(rule->uid_range.start, range.start) || - !uid_eq(rule->uid_range.end, range.end))) - continue; - - if (tb[FRA_IP_PROTO] && - (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO]))) - continue; - - if (fib_rule_port_range_set(&sprange) && - !fib_rule_port_range_compare(&rule->sport_range, &sprange)) - continue; - - if (fib_rule_port_range_set(&dprange) && - !fib_rule_port_range_compare(&rule->dport_range, &dprange)) - continue; - - if (!ops->compare(rule, frh, tb)) - continue; - - if (rule->flags & FIB_RULE_PERMANENT) { - err = -EPERM; - goto errout; - } - - if (ops->delete) { - err = ops->delete(rule); - if (err) - goto errout; - } + if (rule->tun_id) + ip_tunnel_unneed_metadata(); - if (rule->tun_id) - ip_tunnel_unneed_metadata(); + list_del_rcu(&rule->list); - list_del_rcu(&rule->list); - - if (rule->action == FR_ACT_GOTO) { - ops->nr_goto_rules--; - if (rtnl_dereference(rule->ctarget) == NULL) - ops->unresolved_rules--; - } + if (rule->action == FR_ACT_GOTO) { + ops->nr_goto_rules--; + if (rtnl_dereference(rule->ctarget) == NULL) + ops->unresolved_rules--; + } - /* - * Check if this rule is a target to any of them. If so, - * adjust to the next one with the same preference or - * disable them. As this operation is eventually very - * expensive, it is only performed if goto rules, except - * current if it is goto rule, have actually been added. - */ - if (ops->nr_goto_rules > 0) { - struct fib_rule *n; - - n = list_next_entry(rule, list); - if (&n->list == &ops->rules_list || n->pref != rule->pref) - n = NULL; - list_for_each_entry(r, &ops->rules_list, list) { - if (rtnl_dereference(r->ctarget) != rule) - continue; - rcu_assign_pointer(r->ctarget, n); - if (!n) - ops->unresolved_rules++; - } + /* + * Check if this rule is a target to any of them. If so, + * adjust to the next one with the same preference or + * disable them. As this operation is eventually very + * expensive, it is only performed if goto rules, except + * current if it is goto rule, have actually been added. + */ + if (ops->nr_goto_rules > 0) { + struct fib_rule *n; + + n = list_next_entry(rule, list); + if (&n->list == &ops->rules_list || n->pref != rule->pref) + n = NULL; + list_for_each_entry(r, &ops->rules_list, list) { + if (rtnl_dereference(r->ctarget) != rule) + continue; + rcu_assign_pointer(r->ctarget, n); + if (!n) + ops->unresolved_rules++; } - - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, - NULL); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).portid); - fib_rule_put(rule); - flush_route_cache(ops); - rules_ops_put(ops); - return 0; } - err = -ENOENT; + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, + NULL); + notify_rule_change(RTM_DELRULE, rule, ops, nlh, + NETLINK_CB(skb).portid); + fib_rule_put(rule); + flush_route_cache(ops); + rules_ops_put(ops); + kfree(nlrule); + return 0; + errout: + if (nlrule) + kfree(nlrule); rules_ops_put(ops); return err; } diff --git a/net/core/filter.c b/net/core/filter.c index e77c30ca491d..d3781daa26ab 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -57,6 +57,7 @@ #include <net/sock_reuseport.h> #include <net/busy_poll.h> #include <net/tcp.h> +#include <net/xfrm.h> #include <linux/bpf_trace.h> /** @@ -2692,8 +2693,9 @@ static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) { + void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); unsigned long metalen = xdp_get_metalen(xdp); - void *data_start = xdp->data_hard_start + metalen; + void *data_start = xdp_frame_end + metalen; void *data = xdp->data + offset; if (unlikely(data < data_start || @@ -2717,14 +2719,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) +{ + void *data_end = xdp->data_end + offset; + + /* only shrinking is allowed for now. */ + if (unlikely(offset >= 0)) + return -EINVAL; + + if (unlikely(data_end < xdp->data + ETH_HLEN)) + return -EINVAL; + + xdp->data_end = data_end; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = { + .func = bpf_xdp_adjust_tail, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) { + void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); void *meta = xdp->data_meta + offset; unsigned long metalen = xdp->data - meta; if (xdp_data_meta_unsupported(xdp)) return -ENOTSUPP; - if (unlikely(meta < xdp->data_hard_start || + if (unlikely(meta < xdp_frame_end || meta > xdp->data)) return -EINVAL; if (unlikely((metalen & (sizeof(__u32) - 1)) || @@ -2749,13 +2776,18 @@ static int __bpf_tx_xdp(struct net_device *dev, struct xdp_buff *xdp, u32 index) { + struct xdp_frame *xdpf; int err; if (!dev->netdev_ops->ndo_xdp_xmit) { return -EOPNOTSUPP; } - err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); if (err) return err; dev->netdev_ops->ndo_xdp_flush(dev); @@ -2771,11 +2803,19 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, if (map->map_type == BPF_MAP_TYPE_DEVMAP) { struct net_device *dev = fwd; + struct xdp_frame *xdpf; if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; - err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + + /* TODO: move to inside map code instead, for bulk support + * err = dev_map_enqueue(dev, xdp); + */ + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); if (err) return err; __dev_map_insert_ctx(map, index); @@ -3053,7 +3093,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_l4_csum_replace || func == bpf_xdp_adjust_head || func == bpf_xdp_adjust_meta || - func == bpf_msg_pull_data) + func == bpf_msg_pull_data || + func == bpf_xdp_adjust_tail) return true; return false; @@ -3704,6 +3745,49 @@ static const struct bpf_func_proto bpf_bind_proto = { .arg3_type = ARG_CONST_SIZE, }; +#ifdef CONFIG_XFRM +BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index, + struct bpf_xfrm_state *, to, u32, size, u64, flags) +{ + const struct sec_path *sp = skb_sec_path(skb); + const struct xfrm_state *x; + + if (!sp || unlikely(index >= sp->len || flags)) + goto err_clear; + + x = sp->xvec[index]; + + if (unlikely(size != sizeof(struct bpf_xfrm_state))) + goto err_clear; + + to->reqid = x->props.reqid; + to->spi = x->id.spi; + to->family = x->props.family; + if (to->family == AF_INET6) { + memcpy(to->remote_ipv6, x->props.saddr.a6, + sizeof(to->remote_ipv6)); + } else { + to->remote_ipv4 = x->props.saddr.a4; + } + + return 0; +err_clear: + memset(to, 0, size); + return -EINVAL; +} + +static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { + .func = bpf_skb_get_xfrm_state, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, + .arg5_type = ARG_ANYTHING, +}; +#endif + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -3845,6 +3929,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; +#ifdef CONFIG_XFRM + case BPF_FUNC_skb_get_xfrm_state: + return &bpf_skb_get_xfrm_state_proto; +#endif default: return bpf_base_func_proto(func_id); } @@ -3868,6 +3956,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_redirect_proto; case BPF_FUNC_redirect_map: return &bpf_xdp_redirect_map_proto; + case BPF_FUNC_xdp_adjust_tail: + return &bpf_xdp_adjust_tail_proto; default: return bpf_base_func_proto(func_id); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ce519861be59..5afae29367c1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -820,7 +820,8 @@ static void neigh_periodic_work(struct work_struct *work) write_lock(&n->lock); state = n->nud_state; - if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || + (n->flags & NTF_EXT_LEARNED)) { write_unlock(&n->lock); goto next_elt; } @@ -1136,6 +1137,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (neigh->dead) goto out; + neigh_update_ext_learned(neigh, flags, ¬ify); + if (!(new & NUD_VALID)) { neigh_del_timer(neigh); if (old & NUD_CONNECTED) @@ -1781,6 +1784,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, flags &= ~NEIGH_UPDATE_F_OVERRIDE; } + if (ndm->ndm_flags & NTF_EXT_LEARNED) + flags |= NEIGH_UPDATE_F_EXT_LEARNED; + if (ndm->ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; diff --git a/net/core/page_pool.c b/net/core/page_pool.c new file mode 100644 index 000000000000..68bf07206744 --- /dev/null +++ b/net/core/page_pool.c @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * page_pool.c + * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> + * Copyright (C) 2016 Red Hat, Inc. + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include <net/page_pool.h> +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include <linux/page-flags.h> +#include <linux/mm.h> /* for __put_page() */ + +static int page_pool_init(struct page_pool *pool, + const struct page_pool_params *params) +{ + unsigned int ring_qsize = 1024; /* Default */ + + memcpy(&pool->p, params, sizeof(pool->p)); + + /* Validate only known flags were used */ + if (pool->p.flags & ~(PP_FLAG_ALL)) + return -EINVAL; + + if (pool->p.pool_size) + ring_qsize = pool->p.pool_size; + + /* Sanity limit mem that can be pinned down */ + if (ring_qsize > 32768) + return -E2BIG; + + /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. + * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, + * which is the XDP_TX use-case. + */ + if ((pool->p.dma_dir != DMA_FROM_DEVICE) && + (pool->p.dma_dir != DMA_BIDIRECTIONAL)) + return -EINVAL; + + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) + return -ENOMEM; + + return 0; +} + +struct page_pool *page_pool_create(const struct page_pool_params *params) +{ + struct page_pool *pool; + int err = 0; + + pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); + if (!pool) + return ERR_PTR(-ENOMEM); + + err = page_pool_init(pool, params); + if (err < 0) { + pr_warn("%s() gave up with errno %d\n", __func__, err); + kfree(pool); + return ERR_PTR(err); + } + return pool; +} +EXPORT_SYMBOL(page_pool_create); + +/* fast path */ +static struct page *__page_pool_get_cached(struct page_pool *pool) +{ + struct ptr_ring *r = &pool->ring; + struct page *page; + + /* Quicker fallback, avoid locks when ring is empty */ + if (__ptr_ring_empty(r)) + return NULL; + + /* Test for safe-context, caller should provide this guarantee */ + if (likely(in_serving_softirq())) { + if (likely(pool->alloc.count)) { + /* Fast-path */ + page = pool->alloc.cache[--pool->alloc.count]; + return page; + } + /* Slower-path: Alloc array empty, time to refill + * + * Open-coded bulk ptr_ring consumer. + * + * Discussion: the ring consumer lock is not really + * needed due to the softirq/NAPI protection, but + * later need the ability to reclaim pages on the + * ring. Thus, keeping the locks. + */ + spin_lock(&r->consumer_lock); + while ((page = __ptr_ring_consume(r))) { + if (pool->alloc.count == PP_ALLOC_CACHE_REFILL) + break; + pool->alloc.cache[pool->alloc.count++] = page; + } + spin_unlock(&r->consumer_lock); + return page; + } + + /* Slow-path: Get page from locked ring queue */ + page = ptr_ring_consume(&pool->ring); + return page; +} + +/* slow path */ +noinline +static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, + gfp_t _gfp) +{ + struct page *page; + gfp_t gfp = _gfp; + dma_addr_t dma; + + /* We could always set __GFP_COMP, and avoid this branch, as + * prep_new_page() can handle order-0 with __GFP_COMP. + */ + if (pool->p.order) + gfp |= __GFP_COMP; + + /* FUTURE development: + * + * Current slow-path essentially falls back to single page + * allocations, which doesn't improve performance. This code + * need bulk allocation support from the page allocator code. + */ + + /* Cache was empty, do real allocation */ + page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); + if (!page) + return NULL; + + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + goto skip_dma_map; + + /* Setup DMA mapping: use page->private for DMA-addr + * This mapping is kept for lifetime of page, until leaving pool. + */ + dma = dma_map_page(pool->p.dev, page, 0, + (PAGE_SIZE << pool->p.order), + pool->p.dma_dir); + if (dma_mapping_error(pool->p.dev, dma)) { + put_page(page); + return NULL; + } + set_page_private(page, dma); /* page->private = dma; */ + +skip_dma_map: + /* When page just alloc'ed is should/must have refcnt 1. */ + return page; +} + +/* For using page_pool replace: alloc_pages() API calls, but provide + * synchronization guarantee for allocation side. + */ +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) +{ + struct page *page; + + /* Fast-path: Get a page from cache */ + page = __page_pool_get_cached(pool); + if (page) + return page; + + /* Slow-path: cache empty, do real allocation */ + page = __page_pool_alloc_pages_slow(pool, gfp); + return page; +} +EXPORT_SYMBOL(page_pool_alloc_pages); + +/* Cleanup page_pool state from page */ +static void __page_pool_clean_page(struct page_pool *pool, + struct page *page) +{ + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + return; + + /* DMA unmap */ + dma_unmap_page(pool->p.dev, page_private(page), + PAGE_SIZE << pool->p.order, pool->p.dma_dir); + set_page_private(page, 0); +} + +/* Return a page to the page allocator, cleaning up our state */ +static void __page_pool_return_page(struct page_pool *pool, struct page *page) +{ + __page_pool_clean_page(pool, page); + put_page(page); + /* An optimization would be to call __free_pages(page, pool->p.order) + * knowing page is not part of page-cache (thus avoiding a + * __page_cache_release() call). + */ +} + +static bool __page_pool_recycle_into_ring(struct page_pool *pool, + struct page *page) +{ + int ret; + /* BH protection not needed if current is serving softirq */ + if (in_serving_softirq()) + ret = ptr_ring_produce(&pool->ring, page); + else + ret = ptr_ring_produce_bh(&pool->ring, page); + + return (ret == 0) ? true : false; +} + +/* Only allow direct recycling in special circumstances, into the + * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case. + * + * Caller must provide appropriate safe context. + */ +static bool __page_pool_recycle_direct(struct page *page, + struct page_pool *pool) +{ + if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) + return false; + + /* Caller MUST have verified/know (page_ref_count(page) == 1) */ + pool->alloc.cache[pool->alloc.count++] = page; + return true; +} + +void __page_pool_put_page(struct page_pool *pool, + struct page *page, bool allow_direct) +{ + /* This allocator is optimized for the XDP mode that uses + * one-frame-per-page, but have fallbacks that act like the + * regular page allocator APIs. + * + * refcnt == 1 means page_pool owns page, and can recycle it. + */ + if (likely(page_ref_count(page) == 1)) { + /* Read barrier done in page_ref_count / READ_ONCE */ + + if (allow_direct && in_serving_softirq()) + if (__page_pool_recycle_direct(page, pool)) + return; + + if (!__page_pool_recycle_into_ring(pool, page)) { + /* Cache full, fallback to free pages */ + __page_pool_return_page(pool, page); + } + return; + } + /* Fallback/non-XDP mode: API user have elevated refcnt. + * + * Many drivers split up the page into fragments, and some + * want to keep doing this to save memory and do refcnt based + * recycling. Support this use case too, to ease drivers + * switching between XDP/non-XDP. + * + * In-case page_pool maintains the DMA mapping, API user must + * call page_pool_put_page once. In this elevated refcnt + * case, the DMA is unmapped/released, as driver is likely + * doing refcnt based recycle tricks, meaning another process + * will be invoking put_page. + */ + __page_pool_clean_page(pool, page); + put_page(page); +} +EXPORT_SYMBOL(__page_pool_put_page); + +static void __page_pool_empty_ring(struct page_pool *pool) +{ + struct page *page; + + /* Empty recycle ring */ + while ((page = ptr_ring_consume(&pool->ring))) { + /* Verify the refcnt invariant of cached pages */ + if (!(page_ref_count(page) == 1)) + pr_crit("%s() page_pool refcnt %d violation\n", + __func__, page_ref_count(page)); + + __page_pool_return_page(pool, page); + } +} + +static void __page_pool_destroy_rcu(struct rcu_head *rcu) +{ + struct page_pool *pool; + + pool = container_of(rcu, struct page_pool, rcu); + + WARN(pool->alloc.count, "API usage violation"); + + __page_pool_empty_ring(pool); + ptr_ring_cleanup(&pool->ring, NULL); + kfree(pool); +} + +/* Cleanup and release resources */ +void page_pool_destroy(struct page_pool *pool) +{ + struct page *page; + + /* Empty alloc cache, assume caller made sure this is + * no-longer in use, and page_pool_alloc_pages() cannot be + * call concurrently. + */ + while (pool->alloc.count) { + page = pool->alloc.cache[--pool->alloc.count]; + __page_pool_return_page(pool, page); + } + + /* No more consumers should exist, but producers could still + * be in-flight. + */ + __page_pool_empty_ring(pool); + + /* An xdp_mem_allocator can still ref page_pool pointer */ + call_rcu(&pool->rcu, __page_pool_destroy_rcu); +} +EXPORT_SYMBOL(page_pool_destroy); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 45936922d7e2..80802546c279 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -785,13 +785,15 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse), - .rta_used = dst->__use, - .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, .rta_id = id, }; + if (dst) { + ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); + ci.rta_used = dst->__use; + ci.rta_clntref = atomic_read(&dst->__refcnt); + } if (expires) { unsigned long clock; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 345b51837ca8..c642304f178c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1305,7 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) skb->inner_mac_header += off; } -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) +void skb_copy_header(struct sk_buff *new, const struct sk_buff *old) { __copy_skb_header(new, old); @@ -1313,6 +1313,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } +EXPORT_SYMBOL(skb_copy_header); static inline int skb_alloc_rx_flag(const struct sk_buff *skb) { @@ -1355,7 +1356,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); - copy_skb_header(n, skb); + skb_copy_header(n, skb); return n; } EXPORT_SYMBOL(skb_copy); @@ -1419,7 +1420,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, skb_clone_fraglist(n); } - copy_skb_header(n, skb); + skb_copy_header(n, skb); out: return n; } @@ -1599,7 +1600,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, skb->len + head_copy_len)); - copy_skb_header(n, skb); + skb_copy_header(n, skb); skb_headers_offset_update(n, newheadroom - oldheadroom); @@ -1839,6 +1840,20 @@ done: } EXPORT_SYMBOL(___pskb_trim); +/* Note : use pskb_trim_rcsum() instead of calling this directly + */ +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + int delta = skb->len - len; + + skb->csum = csum_sub(skb->csum, + skb_checksum(skb, len, delta, 0)); + } + return __pskb_trim(skb, len); +} +EXPORT_SYMBOL(pskb_trim_rcsum_slow); + /** * __pskb_pull_tail - advance tail of skb header * @skb: buffer to reallocate @@ -4926,6 +4941,8 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) thlen = tcp_hdrlen(skb); } else if (unlikely(skb_is_gso_sctp(skb))) { thlen = sizeof(struct sctphdr); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + thlen = sizeof(struct udphdr); } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already diff --git a/net/core/sock.c b/net/core/sock.c index 6444525f610c..b2c3db169ca1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -905,7 +905,10 @@ set_rcvbuf: case SO_RCVLOWAT: if (val < 0) val = INT_MAX; - sk->sk_rcvlowat = val ? : 1; + if (sock->ops->set_rcvlowat) + ret = sock->ops->set_rcvlowat(sk, val); + else + sk->sk_rcvlowat = val ? : 1; break; case SO_RCVTIMEO: diff --git a/net/core/xdp.c b/net/core/xdp.c index 097a0f74e004..0c86b53a3a63 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -5,6 +5,10 @@ */ #include <linux/types.h> #include <linux/mm.h> +#include <linux/slab.h> +#include <linux/idr.h> +#include <linux/rhashtable.h> +#include <net/page_pool.h> #include <net/xdp.h> @@ -13,6 +17,104 @@ #define REG_STATE_UNREGISTERED 0x2 #define REG_STATE_UNUSED 0x3 +static DEFINE_IDA(mem_id_pool); +static DEFINE_MUTEX(mem_id_lock); +#define MEM_ID_MAX 0xFFFE +#define MEM_ID_MIN 1 +static int mem_id_next = MEM_ID_MIN; + +static bool mem_id_init; /* false */ +static struct rhashtable *mem_id_ht; + +struct xdp_mem_allocator { + struct xdp_mem_info mem; + union { + void *allocator; + struct page_pool *page_pool; + }; + struct rhash_head node; + struct rcu_head rcu; +}; + +static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) +{ + const u32 *k = data; + const u32 key = *k; + + BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) + != sizeof(u32)); + + /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */ + return key << RHT_HASH_RESERVED_SPACE; +} + +static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct xdp_mem_allocator *xa = ptr; + u32 mem_id = *(u32 *)arg->key; + + return xa->mem.id != mem_id; +} + +static const struct rhashtable_params mem_id_rht_params = { + .nelem_hint = 64, + .head_offset = offsetof(struct xdp_mem_allocator, node), + .key_offset = offsetof(struct xdp_mem_allocator, mem.id), + .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), + .max_size = MEM_ID_MAX, + .min_size = 8, + .automatic_shrinking = true, + .hashfn = xdp_mem_id_hashfn, + .obj_cmpfn = xdp_mem_id_cmp, +}; + +static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) +{ + struct xdp_mem_allocator *xa; + + xa = container_of(rcu, struct xdp_mem_allocator, rcu); + + /* Allow this ID to be reused */ + ida_simple_remove(&mem_id_pool, xa->mem.id); + + /* Notice, driver is expected to free the *allocator, + * e.g. page_pool, and MUST also use RCU free. + */ + + /* Poison memory */ + xa->mem.id = 0xFFFF; + xa->mem.type = 0xF0F0; + xa->allocator = (void *)0xDEAD9001; + + kfree(xa); +} + +static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) +{ + struct xdp_mem_allocator *xa; + int id = xdp_rxq->mem.id; + int err; + + if (id == 0) + return; + + mutex_lock(&mem_id_lock); + + xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); + if (!xa) { + mutex_unlock(&mem_id_lock); + return; + } + + err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params); + WARN_ON(err); + + call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); + + mutex_unlock(&mem_id_lock); +} + void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) { /* Simplify driver cleanup code paths, allow unreg "unused" */ @@ -21,8 +123,14 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); + __xdp_rxq_info_unreg_mem_model(xdp_rxq); + xdp_rxq->reg_state = REG_STATE_UNREGISTERED; xdp_rxq->dev = NULL; + + /* Reset mem info to defaults */ + xdp_rxq->mem.id = 0; + xdp_rxq->mem.type = 0; } EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); @@ -71,3 +179,164 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) return (xdp_rxq->reg_state == REG_STATE_REGISTERED); } EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); + +static int __mem_id_init_hash_table(void) +{ + struct rhashtable *rht; + int ret; + + if (unlikely(mem_id_init)) + return 0; + + rht = kzalloc(sizeof(*rht), GFP_KERNEL); + if (!rht) + return -ENOMEM; + + ret = rhashtable_init(rht, &mem_id_rht_params); + if (ret < 0) { + kfree(rht); + return ret; + } + mem_id_ht = rht; + smp_mb(); /* mutex lock should provide enough pairing */ + mem_id_init = true; + + return 0; +} + +/* Allocate a cyclic ID that maps to allocator pointer. + * See: https://www.kernel.org/doc/html/latest/core-api/idr.html + * + * Caller must lock mem_id_lock. + */ +static int __mem_id_cyclic_get(gfp_t gfp) +{ + int retries = 1; + int id; + +again: + id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); + if (id < 0) { + if (id == -ENOSPC) { + /* Cyclic allocator, reset next id */ + if (retries--) { + mem_id_next = MEM_ID_MIN; + goto again; + } + } + return id; /* errno */ + } + mem_id_next = id + 1; + + return id; +} + +static bool __is_supported_mem_type(enum xdp_mem_type type) +{ + if (type == MEM_TYPE_PAGE_POOL) + return is_page_pool_compiled_in(); + + if (type >= MEM_TYPE_MAX) + return false; + + return true; +} + +int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + enum xdp_mem_type type, void *allocator) +{ + struct xdp_mem_allocator *xdp_alloc; + gfp_t gfp = GFP_KERNEL; + int id, errno, ret; + void *ptr; + + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { + WARN(1, "Missing register, driver bug"); + return -EFAULT; + } + + if (!__is_supported_mem_type(type)) + return -EOPNOTSUPP; + + xdp_rxq->mem.type = type; + + if (!allocator) { + if (type == MEM_TYPE_PAGE_POOL) + return -EINVAL; /* Setup time check page_pool req */ + return 0; + } + + /* Delay init of rhashtable to save memory if feature isn't used */ + if (!mem_id_init) { + mutex_lock(&mem_id_lock); + ret = __mem_id_init_hash_table(); + mutex_unlock(&mem_id_lock); + if (ret < 0) { + WARN_ON(1); + return ret; + } + } + + xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); + if (!xdp_alloc) + return -ENOMEM; + + mutex_lock(&mem_id_lock); + id = __mem_id_cyclic_get(gfp); + if (id < 0) { + errno = id; + goto err; + } + xdp_rxq->mem.id = id; + xdp_alloc->mem = xdp_rxq->mem; + xdp_alloc->allocator = allocator; + + /* Insert allocator into ID lookup table */ + ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); + if (IS_ERR(ptr)) { + errno = PTR_ERR(ptr); + goto err; + } + + mutex_unlock(&mem_id_lock); + + return 0; +err: + mutex_unlock(&mem_id_lock); + kfree(xdp_alloc); + return errno; +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); + +void xdp_return_frame(struct xdp_frame *xdpf) +{ + struct xdp_mem_info *mem = &xdpf->mem; + struct xdp_mem_allocator *xa; + void *data = xdpf->data; + struct page *page; + + switch (mem->type) { + case MEM_TYPE_PAGE_POOL: + rcu_read_lock(); + /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ + xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); + page = virt_to_head_page(data); + if (xa) + page_pool_put_page(xa->page_pool, page); + else + put_page(page); + rcu_read_unlock(); + break; + case MEM_TYPE_PAGE_SHARED: + page_frag_free(data); + break; + case MEM_TYPE_PAGE_ORDER0: + page = virt_to_page(data); /* Assumes order0 page*/ + put_page(page); + break; + default: + /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ + break; + } +} +EXPORT_SYMBOL_GPL(xdp_return_frame); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index c795c3f509c9..72236695db3d 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -121,13 +121,16 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, - struct nlattr **tb) + struct nlattr **tb, + struct netlink_ext_ack *extack) { int err = -EINVAL; struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - if (frh->tos) + if (frh->tos) { + NL_SET_ERR_MSG(extack, "Invalid tos value"); goto errout; + } if (rule->table == RT_TABLE_UNSPEC) { if (rule->action == FR_ACT_TO_TBL) { diff --git a/net/dsa/master.c b/net/dsa/master.c index 90e6df0351eb..c90ee3227dea 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -22,7 +22,7 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, int port = cpu_dp->index; int count = 0; - if (ops && ops->get_sset_count && ops->get_ethtool_stats) { + if (ops->get_sset_count && ops->get_ethtool_stats) { count = ops->get_sset_count(dev, ETH_SS_STATS); ops->get_ethtool_stats(dev, stats, data); } @@ -31,6 +31,32 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, ds->ops->get_ethtool_stats(ds, port, data + count); } +static void dsa_master_get_ethtool_phy_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int count = 0; + + if (dev->phydev && !ops->get_ethtool_phy_stats) { + count = phy_ethtool_get_sset_count(dev->phydev); + if (count >= 0) + phy_ethtool_get_stats(dev->phydev, stats, data); + } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) { + count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); + ops->get_ethtool_phy_stats(dev, stats, data); + } + + if (count < 0) + count = 0; + + if (ds->ops->get_ethtool_phy_stats) + ds->ops->get_ethtool_phy_stats(ds, port, data + count); +} + static int dsa_master_get_sset_count(struct net_device *dev, int sset) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -38,11 +64,17 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = cpu_dp->ds; int count = 0; - if (ops && ops->get_sset_count) - count += ops->get_sset_count(dev, sset); + if (sset == ETH_SS_PHY_STATS && dev->phydev && + !ops->get_ethtool_phy_stats) + count = phy_ethtool_get_sset_count(dev->phydev); + else if (ops->get_sset_count) + count = ops->get_sset_count(dev, sset); + + if (count < 0) + count = 0; - if (sset == ETH_SS_STATS && ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds, cpu_dp->index); + if (ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds, cpu_dp->index, sset); return count; } @@ -64,19 +96,28 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; - if (ops && ops->get_sset_count && ops->get_strings) { - mcount = ops->get_sset_count(dev, ETH_SS_STATS); + if (stringset == ETH_SS_PHY_STATS && dev->phydev && + !ops->get_ethtool_phy_stats) { + mcount = phy_ethtool_get_sset_count(dev->phydev); + if (mcount < 0) + mcount = 0; + else + phy_ethtool_get_strings(dev->phydev, data); + } else if (ops->get_sset_count && ops->get_strings) { + mcount = ops->get_sset_count(dev, stringset); + if (mcount < 0) + mcount = 0; ops->get_strings(dev, stringset, data); } - if (stringset == ETH_SS_STATS && ds->ops->get_strings) { + if (ds->ops->get_strings) { ndata = data + mcount * len; /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle * the output after to prepend our CPU port prefix we * constructed earlier */ - ds->ops->get_strings(ds, port, ndata); - count = ds->ops->get_sset_count(ds, port); + ds->ops->get_strings(ds, port, stringset, ndata); + count = ds->ops->get_sset_count(ds, port, stringset); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); @@ -102,6 +143,7 @@ static int dsa_master_ethtool_setup(struct net_device *dev) ops->get_sset_count = dsa_master_get_sset_count; ops->get_ethtool_stats = dsa_master_get_ethtool_stats; ops->get_strings = dsa_master_get_strings; + ops->get_ethtool_phy_stats = dsa_master_get_ethtool_phy_stats; dev->ethtool_ops = ops; diff --git a/net/dsa/port.c b/net/dsa/port.c index 7acc1169d75e..2413beb995be 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -273,25 +273,38 @@ int dsa_port_vlan_del(struct dsa_port *dp, return 0; } -static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) +static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) { - struct device_node *port_dn = dp->dn; struct device_node *phy_dn; - struct dsa_switch *ds = dp->ds; struct phy_device *phydev; - int port = dp->index; - int err = 0; - phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); + phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0); if (!phy_dn) - return 0; + return NULL; phydev = of_phy_find_device(phy_dn); if (!phydev) { - err = -EPROBE_DEFER; - goto err_put_of; + of_node_put(phy_dn); + return ERR_PTR(-EPROBE_DEFER); } + return phydev; +} + +static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) +{ + struct dsa_switch *ds = dp->ds; + struct phy_device *phydev; + int port = dp->index; + int err = 0; + + phydev = dsa_port_get_phy_device(dp); + if (!phydev) + return 0; + + if (IS_ERR(phydev)) + return PTR_ERR(phydev); + if (enable) { err = genphy_config_init(phydev); if (err < 0) @@ -317,8 +330,6 @@ static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) err_put_dev: put_device(&phydev->mdio.dev); -err_put_of: - of_node_put(phy_dn); return err; } @@ -372,3 +383,60 @@ void dsa_port_link_unregister_of(struct dsa_port *dp) else dsa_port_setup_phy_of(dp, false); } + +int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data) +{ + struct phy_device *phydev; + int ret = -EOPNOTSUPP; + + if (of_phy_is_fixed_link(dp->dn)) + return ret; + + phydev = dsa_port_get_phy_device(dp); + if (IS_ERR_OR_NULL(phydev)) + return ret; + + ret = phy_ethtool_get_strings(phydev, data); + put_device(&phydev->mdio.dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dsa_port_get_phy_strings); + +int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data) +{ + struct phy_device *phydev; + int ret = -EOPNOTSUPP; + + if (of_phy_is_fixed_link(dp->dn)) + return ret; + + phydev = dsa_port_get_phy_device(dp); + if (IS_ERR_OR_NULL(phydev)) + return ret; + + ret = phy_ethtool_get_stats(phydev, NULL, data); + put_device(&phydev->mdio.dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dsa_port_get_ethtool_phy_stats); + +int dsa_port_get_phy_sset_count(struct dsa_port *dp) +{ + struct phy_device *phydev; + int ret = -EOPNOTSUPP; + + if (of_phy_is_fixed_link(dp->dn)) + return ret; + + phydev = dsa_port_get_phy_device(dp); + if (IS_ERR_OR_NULL(phydev)) + return ret; + + ret = phy_ethtool_get_sset_count(phydev); + put_device(&phydev->mdio.dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 18561af7a8f1..c287f1ef964c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -560,7 +560,8 @@ static void dsa_slave_get_strings(struct net_device *dev, strncpy(data + 2 * len, "rx_packets", len); strncpy(data + 3 * len, "rx_bytes", len); if (ds->ops->get_strings) - ds->ops->get_strings(ds, dp->index, data + 4 * len); + ds->ops->get_strings(ds, dp->index, stringset, + data + 4 * len); } } @@ -605,7 +606,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) count = 4; if (ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds, dp->index); + count += ds->ops->get_sset_count(ds, dp->index, sset); return count; } @@ -1440,6 +1441,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_fdb_info *fdb_info = ptr; struct dsa_switchdev_event_work *switchdev_work; if (!dsa_slave_dev_check(dev)) @@ -1457,8 +1459,10 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, switch (event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (!fdb_info->added_by_user) + break; if (dsa_slave_switchdev_fdb_work_init(switchdev_work, - ptr)) + fdb_info)) goto err_fdb_work_init; dev_hold(dev); break; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a07b7dd06def..b379520f9133 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,7 +13,8 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ - inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o + inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ + metrics.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eaed0367e669..b403499fdabe 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -994,7 +994,9 @@ const struct proto_ops inet_stream_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, - .mmap = sock_no_mmap, +#ifdef CONFIG_MMU + .mmap = tcp_mmap, +#endif .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, @@ -1006,6 +1008,7 @@ const struct proto_ops inet_stream_ops = { .compat_getsockopt = compat_sock_common_getsockopt, .compat_ioctl = inet_compat_ioctl, #endif + .set_rcvlowat = tcp_set_rcvlowat, }; EXPORT_SYMBOL(inet_stream_ops); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 737d11bc8838..f8eb78d042a4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, - struct nlattr **tb) + struct nlattr **tb, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; - if (frh->tos & ~IPTOS_TOS_MASK) + if (frh->tos & ~IPTOS_TOS_MASK) { + NL_SET_ERR_MSG(extack, "Invalid tos"); goto errout; + } /* split local/main if they are not already split */ err = fib_unmerge(net); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c27122f01b87..6608db23f54b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1019,47 +1019,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) static int fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) { - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - - if (!cfg->fc_mx) - return 0; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (type > RTAX_MAX) - return -EINVAL; - - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; - - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - return -EINVAL; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_ADVMSS && val > 65535 - 40) - val = 65535 - 40; - if (type == RTAX_MTU && val > 65535 - 15) - val = 65535 - 15; - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - return -EINVAL; - fi->fib_metrics->metrics[type - 1] = val; - } - - if (ecn_ca) - fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; - - return 0; + return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len, + fi->fib_metrics->metrics); } struct fib_info *fib_create_info(struct fib_config *cfg, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 9c169bb2444d..dfe5b22f6ed4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -578,6 +578,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, int tunnel_hlen; int version; __be16 df; + int nhoff; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -605,6 +606,11 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, truncate = true; } + nhoff = skb_network_header(skb) - skb_mac_header(skb); + if (skb->protocol == htons(ETH_P_IP) && + (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) + truncate = true; + if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 83c73bab2c3d..f2338e40c37d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk, struct rtable *rt = (struct rtable *)cork->dst; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; + bool paged; skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; + paged = !!cork->gso_size; + if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -906,7 +909,7 @@ static int __ip_append_data(struct sock *sk, if (transhdrlen && length + fragheaderlen <= mtu && rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && - !(flags & MSG_MORE) && + (!(flags & MSG_MORE) || cork->gso_size) && !exthdrlen) csummode = CHECKSUM_PARTIAL; @@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; + unsigned int pagedlen = 0; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; @@ -953,8 +957,12 @@ alloc_new_skb: if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else + else if (!paged) alloclen = fraglen; + else { + alloclen = min_t(int, fraglen, MAX_HEADER); + pagedlen = fraglen - alloclen; + } alloclen += exthdrlen; @@ -998,7 +1006,7 @@ alloc_new_skb: /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen + exthdrlen); + data = skb_put(skb, fraglen + exthdrlen - pagedlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); @@ -1014,7 +1022,7 @@ alloc_new_skb: pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; + copy = datalen - transhdrlen - fraggap - pagedlen; if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); @@ -1022,7 +1030,7 @@ alloc_new_skb: } offset += copy; - length -= datalen - fraggap; + length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; @@ -1135,6 +1143,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, *rtp = NULL; cork->fragsize = ip_sk_use_pmtu(sk) ? dst_mtu(&rt->dst) : rt->dst.dev->mtu; + + cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0; cork->dst = &rt->dst; cork->length = 0; cork->ttl = ipc->ttl; @@ -1214,7 +1224,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EOPNOTSUPP; hh_len = LL_RESERVED_SPACE(rt->dst.dev); - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; @@ -1470,9 +1480,8 @@ struct sk_buff *ip_make_skb(struct sock *sk, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags) + struct inet_cork *cork, unsigned int flags) { - struct inet_cork cork; struct sk_buff_head queue; int err; @@ -1481,22 +1490,22 @@ struct sk_buff *ip_make_skb(struct sock *sk, __skb_queue_head_init(&queue); - cork.flags = 0; - cork.addr = 0; - cork.opt = NULL; - err = ip_setup_cork(sk, &cork, ipc, rtp); + cork->flags = 0; + cork->addr = 0; + cork->opt = NULL; + err = ip_setup_cork(sk, cork, ipc, rtp); if (err) return ERR_PTR(err); - err = __ip_append_data(sk, fl4, &queue, &cork, + err = __ip_append_data(sk, fl4, &queue, cork, ¤t->task_frag, getfrag, from, length, transhdrlen, flags); if (err) { - __ip_flush_pending_frames(sk, &queue, &cork); + __ip_flush_pending_frames(sk, &queue, cork); return ERR_PTR(err); } - return __ip_make_skb(sk, fl4, &queue, &cork); + return __ip_make_skb(sk, fl4, &queue, cork); } /* diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 43f620feb1c4..d839d74853fc 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -28,6 +28,9 @@ * * Multiple Nameservers in /proc/net/pnp * -- Josef Siemes <jsiemes@web.de>, Aug 2002 + * + * NTP servers in /proc/net/ipconfig/ntp_servers + * -- Chris Novakovic <chris@chrisn.me.uk>, April 2018 */ #include <linux/types.h> @@ -93,6 +96,7 @@ #define CONF_TIMEOUT_MAX (HZ*30) /* Maximum allowed timeout */ #define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers - '3' from resolv.h */ +#define CONF_NTP_SERVERS_MAX 3 /* Maximum number of NTP servers */ #define NONE cpu_to_be32(INADDR_NONE) #define ANY cpu_to_be32(INADDR_ANY) @@ -152,12 +156,16 @@ static int ic_proto_used; /* Protocol used, if any */ #define ic_proto_used 0 #endif static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ +static __be32 ic_ntp_servers[CONF_NTP_SERVERS_MAX]; /* NTP server IP addresses */ static u8 ic_domain[64]; /* DNS (not NIS) domain name */ /* * Private state. */ +/* proc_dir_entry for /proc/net/ipconfig */ +static struct proc_dir_entry *ipconfig_dir; + /* Name of user-selected boot device */ static char user_dev_name[IFNAMSIZ] __initdata = { 0, }; @@ -576,6 +584,15 @@ static inline void __init ic_nameservers_predef(void) ic_nameservers[i] = NONE; } +/* Predefine NTP servers */ +static inline void __init ic_ntp_servers_predef(void) +{ + int i; + + for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) + ic_ntp_servers[i] = NONE; +} + /* * DHCP/BOOTP support. */ @@ -671,6 +688,7 @@ ic_dhcp_init_options(u8 *options, struct ic_device *d) 17, /* Boot path */ 26, /* MTU */ 40, /* NIS domain name */ + 42, /* NTP servers */ }; *e++ = 55; /* Parameter request list */ @@ -721,9 +739,11 @@ static void __init ic_bootp_init_ext(u8 *e) *e++ = 3; /* Default gateway request */ *e++ = 4; e += 4; - *e++ = 5; /* Name server request */ - *e++ = 8; - e += 8; +#if CONF_NAMESERVERS_MAX > 0 + *e++ = 6; /* (DNS) name server request */ + *e++ = 4 * CONF_NAMESERVERS_MAX; + e += 4 * CONF_NAMESERVERS_MAX; +#endif *e++ = 12; /* Host name request */ *e++ = 32; e += 32; @@ -748,7 +768,13 @@ static void __init ic_bootp_init_ext(u8 *e) */ static inline void __init ic_bootp_init(void) { + /* Re-initialise all name servers and NTP servers to NONE, in case any + * were set via the "ip=" or "nfsaddrs=" kernel command line parameters: + * any IP addresses specified there will already have been decoded but + * are no longer needed + */ ic_nameservers_predef(); + ic_ntp_servers_predef(); dev_add_pack(&bootp_packet_type); } @@ -912,6 +938,15 @@ static void __init ic_do_bootp_ext(u8 *ext) ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); break; + case 42: /* NTP servers */ + servers = *ext / 4; + if (servers > CONF_NTP_SERVERS_MAX) + servers = CONF_NTP_SERVERS_MAX; + for (i = 0; i < servers; i++) { + if (ic_ntp_servers[i] == NONE) + memcpy(&ic_ntp_servers[i], ext+1+4*i, 4); + } + break; } } @@ -1258,6 +1293,7 @@ static int __init ic_dynamic(void) #ifdef CONFIG_PROC_FS +/* Name servers: */ static int pnp_seq_show(struct seq_file *seq, void *v) { int i; @@ -1294,6 +1330,62 @@ static const struct file_operations pnp_seq_fops = { .llseek = seq_lseek, .release = single_release, }; + +/* Create the /proc/net/ipconfig directory */ +static int __init ipconfig_proc_net_init(void) +{ + ipconfig_dir = proc_net_mkdir(&init_net, "ipconfig", init_net.proc_net); + if (!ipconfig_dir) + return -ENOMEM; + + return 0; +} + +/* Create a new file under /proc/net/ipconfig */ +static int ipconfig_proc_net_create(const char *name, + const struct file_operations *fops) +{ + char *pname; + struct proc_dir_entry *p; + + if (!ipconfig_dir) + return -ENOMEM; + + pname = kasprintf(GFP_KERNEL, "%s%s", "ipconfig/", name); + if (!pname) + return -ENOMEM; + + p = proc_create(pname, 0444, init_net.proc_net, fops); + kfree(pname); + if (!p) + return -ENOMEM; + + return 0; +} + +/* Write NTP server IP addresses to /proc/net/ipconfig/ntp_servers */ +static int ntp_servers_seq_show(struct seq_file *seq, void *v) +{ + int i; + + for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) { + if (ic_ntp_servers[i] != NONE) + seq_printf(seq, "%pI4\n", &ic_ntp_servers[i]); + } + return 0; +} + +static int ntp_servers_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, ntp_servers_seq_show, NULL); +} + +static const struct file_operations ntp_servers_seq_fops = { + .open = ntp_servers_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_PROC_FS */ /* @@ -1368,8 +1460,20 @@ static int __init ip_auto_config(void) int err; unsigned int i; + /* Initialise all name servers and NTP servers to NONE (but only if the + * "ip=" or "nfsaddrs=" kernel command line parameters weren't decoded, + * otherwise we'll overwrite the IP addresses specified there) + */ + if (ic_set_manually == 0) { + ic_nameservers_predef(); + ic_ntp_servers_predef(); + } + #ifdef CONFIG_PROC_FS proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops); + + if (ipconfig_proc_net_init() == 0) + ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) @@ -1481,16 +1585,32 @@ static int __init ip_auto_config(void) &ic_servaddr, &root_server_addr, root_server_path); if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); - for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + /* Name servers (if any): */ + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { if (ic_nameservers[i] != NONE) { - pr_cont(" nameserver%u=%pI4", - i, &ic_nameservers[i]); - break; + if (i == 0) + pr_info(" nameserver%u=%pI4", + i, &ic_nameservers[i]); + else + pr_cont(", nameserver%u=%pI4", + i, &ic_nameservers[i]); } - for (i++; i < CONF_NAMESERVERS_MAX; i++) - if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_cont("\n"); + if (i + 1 == CONF_NAMESERVERS_MAX) + pr_cont("\n"); + } + /* NTP servers (if any): */ + for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) { + if (ic_ntp_servers[i] != NONE) { + if (i == 0) + pr_info(" ntpserver%u=%pI4", + i, &ic_ntp_servers[i]); + else + pr_cont(", ntpserver%u=%pI4", + i, &ic_ntp_servers[i]); + } + if (i + 1 == CONF_NTP_SERVERS_MAX) + pr_cont("\n"); + } #endif /* !SILENT */ /* @@ -1588,7 +1708,9 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + /* Initialise all name servers and NTP servers to NONE */ ic_nameservers_predef(); + ic_ntp_servers_predef(); /* Parse string for static IP assignment. */ ip = addrs; @@ -1647,6 +1769,13 @@ static int __init ip_auto_config_setup(char *addrs) ic_nameservers[1] = NONE; } break; + case 9: + if (CONF_NTP_SERVERS_MAX >= 1) { + ic_ntp_servers[0] = in_aton(ip); + if (ic_ntp_servers[0] == ANY) + ic_ntp_servers[0] = NONE; + } + break; } } ip = cp; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2fb4de3f7f66..38e092eafc97 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -201,7 +201,8 @@ static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { }; static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh, struct nlattr **tb) + struct fib_rule_hdr *frh, struct nlattr **tb, + struct netlink_ext_ack *extack) { return 0; } diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c new file mode 100644 index 000000000000..5121c6475e6b --- /dev/null +++ b/net/ipv4/metrics.c @@ -0,0 +1,53 @@ +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/types.h> +#include <net/ip.h> +#include <net/net_namespace.h> +#include <net/tcp.h> + +int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, + u32 *metrics) +{ + bool ecn_ca = false; + struct nlattr *nla; + int remaining; + + if (!fc_mx) + return 0; + + nla_for_each_attr(nla, fc_mx, fc_mx_len, remaining) { + int type = nla_type(nla); + u32 val; + + if (!type) + continue; + if (type > RTAX_MAX) + return -EINVAL; + + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); + if (val == TCP_CA_UNSPEC) + return -EINVAL; + } else { + val = nla_get_u32(nla); + } + if (type == RTAX_ADVMSS && val > 65535 - 40) + val = 65535 - 40; + if (type == RTAX_MTU && val > 65535 - 15) + val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; + if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) + return -EINVAL; + metrics[type - 1] = val; + } + + if (ecn_ca) + metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_metrics_convert); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a058de677e94..261b71d0ccc5 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -296,6 +296,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL), SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), + SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED), + SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c9d00ef54dec..62b776f90037 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1702,6 +1702,139 @@ int tcp_peek_len(struct socket *sock) } EXPORT_SYMBOL(tcp_peek_len); +/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ +int tcp_set_rcvlowat(struct sock *sk, int val) +{ + sk->sk_rcvlowat = val ? : 1; + + /* Check if we need to signal EPOLLIN right now */ + tcp_data_ready(sk); + + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + return 0; + + /* val comes from user space and might be close to INT_MAX */ + val <<= 1; + if (val < 0) + val = INT_MAX; + + val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + if (val > sk->sk_rcvbuf) { + sk->sk_rcvbuf = val; + tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); + } + return 0; +} +EXPORT_SYMBOL(tcp_set_rcvlowat); + +#ifdef CONFIG_MMU +static const struct vm_operations_struct tcp_vm_ops = { +}; + +int tcp_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma) +{ + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + return -EPERM; + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); + + /* Instruct vm_insert_page() to not down_read(mmap_sem) */ + vma->vm_flags |= VM_MIXEDMAP; + + vma->vm_ops = &tcp_vm_ops; + return 0; +} +EXPORT_SYMBOL(tcp_mmap); + +static int tcp_zerocopy_receive(struct sock *sk, + struct tcp_zerocopy_receive *zc) +{ + unsigned long address = (unsigned long)zc->address; + const skb_frag_t *frags = NULL; + u32 length = 0, seq, offset; + struct vm_area_struct *vma; + struct sk_buff *skb = NULL; + struct tcp_sock *tp; + int ret; + + if (address & (PAGE_SIZE - 1) || address != zc->address) + return -EINVAL; + + if (sk->sk_state == TCP_LISTEN) + return -ENOTCONN; + + sock_rps_record_flow(sk); + + down_read(¤t->mm->mmap_sem); + + ret = -EINVAL; + vma = find_vma(current->mm, address); + if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) + goto out; + zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); + + tp = tcp_sk(sk); + seq = tp->copied_seq; + zc->length = min_t(u32, zc->length, tcp_inq(sk)); + zc->length &= ~(PAGE_SIZE - 1); + + zap_page_range(vma, address, zc->length); + + zc->recv_skip_hint = 0; + ret = 0; + while (length + PAGE_SIZE <= zc->length) { + if (zc->recv_skip_hint < PAGE_SIZE) { + if (skb) { + skb = skb->next; + offset = seq - TCP_SKB_CB(skb)->seq; + } else { + skb = tcp_recv_skb(sk, seq, &offset); + } + + zc->recv_skip_hint = skb->len - offset; + offset -= skb_headlen(skb); + if ((int)offset < 0 || skb_has_frag_list(skb)) + break; + frags = skb_shinfo(skb)->frags; + while (offset) { + if (frags->size > offset) + goto out; + offset -= frags->size; + frags++; + } + } + if (frags->size != PAGE_SIZE || frags->page_offset) + break; + ret = vm_insert_page(vma, address + length, + skb_frag_page(frags)); + if (ret) + break; + length += PAGE_SIZE; + seq += PAGE_SIZE; + zc->recv_skip_hint -= PAGE_SIZE; + frags++; + } +out: + up_read(¤t->mm->mmap_sem); + if (length) { + tp->copied_seq = seq; + tcp_rcv_space_adjust(sk); + + /* Clean up data we have read: This will do ACK frames. */ + tcp_recv_skb(sk, seq, &offset); + tcp_cleanup_rbuf(sk, length); + ret = 0; + if (length == zc->length) + zc->recv_skip_hint = 0; + } else { + if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE)) + ret = -EIO; + } + zc->length = length; + return ret; +} +#endif + static void tcp_update_recv_tstamps(struct sk_buff *skb, struct scm_timestamping *tss) { @@ -1757,6 +1890,22 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, } } +static int tcp_inq_hint(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u32 copied_seq = READ_ONCE(tp->copied_seq); + u32 rcv_nxt = READ_ONCE(tp->rcv_nxt); + int inq; + + inq = rcv_nxt - copied_seq; + if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) { + lock_sock(sk); + inq = tp->rcv_nxt - tp->copied_seq; + release_sock(sk); + } + return inq; +} + /* * This routine copies from a sock struct into the user buffer. * @@ -1773,13 +1922,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, u32 peek_seq; u32 *seq; unsigned long used; - int err; + int err, inq; int target; /* Read at least this many bytes */ long timeo; struct sk_buff *skb, *last; u32 urg_hole = 0; struct scm_timestamping tss; bool has_tss = false; + bool has_cmsg; if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); @@ -1794,6 +1944,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (sk->sk_state == TCP_LISTEN) goto out; + has_cmsg = tp->recvmsg_inq; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ @@ -1980,6 +2131,7 @@ skip_copy: if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, &tss); has_tss = true; + has_cmsg = true; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; @@ -1999,13 +2151,20 @@ skip_copy: * on connected socket. I was just happy when found this 8) --ANK */ - if (has_tss) - tcp_recv_timestamp(msg, sk, &tss); - /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); release_sock(sk); + + if (has_cmsg) { + if (has_tss) + tcp_recv_timestamp(msg, sk, &tss); + if (tp->recvmsg_inq) { + inq = tcp_inq_hint(sk); + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); + } + } + return copied; out: @@ -2422,6 +2581,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered_ce = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); @@ -2873,6 +3033,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->notsent_lowat = val; sk->sk_write_space(sk); break; + case TCP_INQ: + if (val > 1 || val < 0) + err = -EINVAL; + else + tp->recvmsg_inq = val; + break; default: err = -ENOPROTOOPT; break; @@ -3031,6 +3197,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate64 = tcp_compute_delivery_rate(tp); if (rate64) info->tcpi_delivery_rate = rate64; + info->tcpi_delivered = tp->delivered; + info->tcpi_delivered_ce = tp->delivered_ce; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3044,7 +3212,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) u32 rate; stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + - 5 * nla_total_size(sizeof(u32)) + + 7 * nla_total_size(sizeof(u32)) + 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -3075,9 +3243,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); + nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); + nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); + return stats; } @@ -3293,6 +3464,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; break; + case TCP_INQ: + val = tp->recvmsg_inq; + break; case TCP_SAVE_SYN: val = tp->save_syn; break; @@ -3329,6 +3503,25 @@ static int do_tcp_getsockopt(struct sock *sk, int level, } return 0; } +#ifdef CONFIG_MMU + case TCP_ZEROCOPY_RECEIVE: { + struct tcp_zerocopy_receive zc; + int err; + + if (get_user(len, optlen)) + return -EFAULT; + if (len != sizeof(zc)) + return -EINVAL; + if (copy_from_user(&zc, optval, len)) + return -EFAULT; + lock_sock(sk); + err = tcp_zerocopy_receive(sk, &zc); + release_sock(sk); + if (!err && copy_to_user(optval, &zc, len)) + err = -EFAULT; + return err; + } +#endif default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e51c644484dc..b188e0d75edd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -111,6 +111,25 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ +#if IS_ENABLED(CONFIG_TLS_DEVICE) +static DEFINE_STATIC_KEY_FALSE(clean_acked_data_enabled); + +void clean_acked_data_enable(struct inet_connection_sock *icsk, + void (*cad)(struct sock *sk, u32 ack_seq)) +{ + icsk->icsk_clean_acked = cad; + static_branch_inc(&clean_acked_data_enabled); +} +EXPORT_SYMBOL_GPL(clean_acked_data_enable); + +void clean_acked_data_disable(struct inet_connection_sock *icsk) +{ + static_branch_dec(&clean_acked_data_enabled); + icsk->icsk_clean_acked = NULL; +} +EXPORT_SYMBOL_GPL(clean_acked_data_disable); +#endif + static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb, unsigned int len) { @@ -582,6 +601,8 @@ void tcp_rcv_space_adjust(struct sock *sk) u32 copied; int time; + trace_tcp_rcv_space_adjust(sk); + tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) @@ -3496,6 +3517,22 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit) tcp_xmit_retransmit_queue(sk); } +/* Returns the number of packets newly acked or sacked by the current ACK */ +static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag) +{ + const struct net *net = sock_net(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 delivered; + + delivered = tp->delivered - prior_delivered; + NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered); + if (flag & FLAG_ECE) { + tp->delivered_ce += delivered; + NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered); + } + return delivered; +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3542,6 +3579,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; icsk->icsk_retransmits = 0; + +#if IS_ENABLED(CONFIG_TLS_DEVICE) + if (static_branch_unlikely(&clean_acked_data_enabled)) + if (icsk->icsk_clean_acked) + icsk->icsk_clean_acked(sk, ack); +#endif } prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; @@ -3619,7 +3662,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); - delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); @@ -3629,9 +3672,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ - if (flag & FLAG_DSACKING_ACK) + if (flag & FLAG_DSACKING_ACK) { tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, &rexmit); + tcp_newly_delivered(sk, delivered, flag); + } /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3655,6 +3700,7 @@ old_ack: &sack_state); tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, &rexmit); + tcp_newly_delivered(sk, delivered, flag); tcp_xmit_recovery(sk, rexmit); } @@ -4573,6 +4619,17 @@ err: } +void tcp_data_ready(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + int avail = tp->rcv_nxt - tp->copied_seq; + + if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE)) + return; + + sk->sk_data_ready(sk); +} + static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -4630,7 +4687,7 @@ queue_and_out: if (eaten > 0) kfree_skb_partial(skb, fragstolen); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + tcp_data_ready(sk); return; } @@ -5023,9 +5080,12 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && /* ... and right edge of window advances far enough. - * (tcp_recvmsg() will send ACK otherwise). Or... + * (tcp_recvmsg() will send ACK otherwise). + * If application uses SO_RCVLOWAT, we want send ack now if + * we have not received enough bytes to satisfy the condition. */ - __tcp_select_window(sk) >= tp->rcv_wnd) || + (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || + __tcp_select_window(sk) >= tp->rcv_wnd)) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ @@ -5428,7 +5488,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, no_ack: if (eaten) kfree_skb_partial(skb, fragstolen); - sk->sk_data_ready(sk); + tcp_data_ready(sk); return; } } @@ -5550,9 +5610,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, return true; } tp->syn_data_acked = tp->syn_data; - if (tp->syn_data_acked) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVE); + if (tp->syn_data_acked) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + /* SYN-data is counted as two separate packets in tcp_ack() */ + if (tp->delivered > 1) + --tp->delivered; + } tcp_fastopen_add_skb(sk, synack); @@ -5884,6 +5947,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } switch (sk->sk_state) { case TCP_SYN_RECV: + tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */ if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383cac0ff0ec..d07c0dcc99aa 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -229,11 +229,9 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, } } - if (mss > (1 << *rcv_wscale)) { - if (!init_rcv_wnd) /* Use default unless specified otherwise */ - init_rcv_wnd = tcp_default_init_rwnd(mss); - *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); - } + if (!init_rcv_wnd) /* Use default unless specified otherwise */ + init_rcv_wnd = tcp_default_init_rwnd(mss); + *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); /* Set the clamp no higher than max representable value */ (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); @@ -585,14 +583,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, unsigned int remaining = MAX_TCP_OPTION_SPACE; struct tcp_fastopen_request *fastopen = tp->fastopen_req; + *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - remaining -= TCPOLEN_MD5SIG_ALIGNED; + if (unlikely(rcu_access_pointer(tp->md5sig_info))) { + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (*md5) { + opts->options |= OPTION_MD5; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + } } -#else - *md5 = NULL; #endif /* We always get an MSS option. The option bytes which will be seen in @@ -720,14 +719,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb opts->options = 0; + *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (unlikely(*md5)) { - opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + if (unlikely(rcu_access_pointer(tp->md5sig_info))) { + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (*md5) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; + } } -#else - *md5 = NULL; #endif if (likely(tp->rx_opt.tstamp_ok)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 24b5c59b1c53..dd3102a37ef9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, } EXPORT_SYMBOL(udp_set_csum); -static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) +static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, + struct inet_cork *cork) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); @@ -777,6 +778,24 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->len = htons(len); uh->check = 0; + if (cork->gso_size) { + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + + if (hlen + cork->gso_size > cork->fragsize) + return -EINVAL; + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (sk->sk_no_check_tx) + return -EINVAL; + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + return -EIO; + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + goto csum_partial; + } + if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); @@ -786,6 +805,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ +csum_partial: udp4_hwcsum(skb, fl4->saddr, fl4->daddr); goto send; @@ -828,7 +848,7 @@ int udp_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_send_skb(skb, fl4); + err = udp_send_skb(skb, fl4, &inet->cork.base); out: up->len = 0; @@ -837,6 +857,43 @@ out: } EXPORT_SYMBOL(udp_push_pending_frames); +static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size) +{ + switch (cmsg->cmsg_type) { + case UDP_SEGMENT: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16))) + return -EINVAL; + *gso_size = *(__u16 *)CMSG_DATA(cmsg); + return 0; + default: + return -EINVAL; + } +} + +int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size) +{ + struct cmsghdr *cmsg; + bool need_ip = false; + int err; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + if (cmsg->cmsg_level != SOL_UDP) { + need_ip = true; + continue; + } + + err = __udp_cmsg_send(cmsg, gso_size); + if (err) + return err; + } + + return need_ip; +} +EXPORT_SYMBOL_GPL(udp_cmsg_send); + int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); @@ -922,10 +979,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; + ipc.gso_size = up->gso_size; if (msg->msg_controllen) { - err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); - if (unlikely(err)) { + err = udp_cmsg_send(sk, msg, &ipc.gso_size); + if (err > 0) + err = ip_cmsg_send(sk, msg, &ipc, + sk->sk_family == AF_INET6); + if (unlikely(err < 0)) { kfree(ipc.opt); return err; } @@ -1030,12 +1091,14 @@ back_from_confirm: /* Lockless fast path for the non-corking case. */ if (!corkreq) { + struct inet_cork cork; + skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, - msg->msg_flags); + &cork, msg->msg_flags); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_send_skb(skb, fl4); + err = udp_send_skb(skb, fl4, &cork); goto out; } @@ -2365,6 +2428,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, up->no_check6_rx = valbool; break; + case UDP_SEGMENT: + if (val < 0 || val > USHRT_MAX) + return -EINVAL; + up->gso_size = val; + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -2455,6 +2524,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->no_check6_rx; break; + case UDP_SEGMENT: + val = up->gso_size; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index ea6e6e7df0ee..006257092f06 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -187,6 +187,68 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); +struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features, + unsigned int mss, __sum16 check) +{ + struct sock *sk = gso_skb->sk; + unsigned int sum_truesize = 0; + struct sk_buff *segs, *seg; + unsigned int hdrlen; + struct udphdr *uh; + + if (gso_skb->len <= sizeof(*uh) + mss) + return ERR_PTR(-EINVAL); + + hdrlen = gso_skb->data - skb_mac_header(gso_skb); + skb_pull(gso_skb, sizeof(*uh)); + + /* clear destructor to avoid skb_segment assigning it to tail */ + WARN_ON_ONCE(gso_skb->destructor != sock_wfree); + gso_skb->destructor = NULL; + + segs = skb_segment(gso_skb, features); + if (unlikely(IS_ERR_OR_NULL(segs))) { + gso_skb->destructor = sock_wfree; + return segs; + } + + for (seg = segs; seg; seg = seg->next) { + uh = udp_hdr(seg); + uh->len = htons(seg->len - hdrlen); + uh->check = check; + + /* last packet can be partial gso_size */ + if (!seg->next) + csum_replace2(&uh->check, htons(mss), + htons(seg->len - hdrlen - sizeof(*uh))); + + uh->check = ~uh->check; + seg->destructor = sock_wfree; + seg->sk = sk; + sum_truesize += seg->truesize; + } + + refcount_add(sum_truesize - gso_skb->truesize, &sk->sk_wmem_alloc); + + return segs; +} +EXPORT_SYMBOL_GPL(__udp_gso_segment); + +static struct sk_buff *__udp4_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features) +{ + const struct iphdr *iph = ip_hdr(gso_skb); + unsigned int mss = skb_shinfo(gso_skb)->gso_size; + + if (!can_checksum_protocol(features, htons(ETH_P_IP))) + return ERR_PTR(-EIO); + + return __udp_gso_segment(gso_skb, features, mss, + udp_v4_check(sizeof(struct udphdr) + mss, + iph->saddr, iph->daddr, 0)); +} + static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -203,12 +265,15 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4))) goto out; if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + return __udp4_gso_segment(skb, features); + mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) goto out; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 78cef00c9596..fbfd71a2d9c8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -170,7 +170,7 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); -static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, +static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, u32 flags, u32 noflags); @@ -916,7 +916,6 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warn("Freeing alive inet6 address %p\n", ifp); return; } - ip6_rt_put(ifp->rt); kfree_rcu(ifp, rcu); } @@ -995,7 +994,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC; struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; - struct rt6_info *rt = NULL; + struct fib6_info *f6i = NULL; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -1037,16 +1036,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } - rt = addrconf_dst_alloc(idev, addr, false); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; + f6i = addrconf_f6i_alloc(net, idev, addr, false, gfp_flags); + if (IS_ERR(f6i)) { + err = PTR_ERR(f6i); + f6i = NULL; goto out; } if (net->ipv6.devconf_all->disable_policy || idev->cnf.disable_policy) - rt->dst.flags |= DST_NOPOLICY; + f6i->dst_nopolicy = true; neigh_parms_data_state_setall(idev->nd_parms); @@ -1068,7 +1067,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->cstamp = ifa->tstamp = jiffies; ifa->tokenized = false; - ifa->rt = rt; + ifa->rt = f6i; ifa->idev = idev; in6_dev_hold(idev); @@ -1102,8 +1101,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, inet6addr_notifier_call_chain(NETDEV_UP, ifa); out: if (unlikely(err < 0)) { - if (rt) - ip6_rt_put(rt); + fib6_info_release(f6i); + if (ifa) { if (ifa->idev) in6_dev_put(ifa->idev); @@ -1179,19 +1178,19 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) static void cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) { - struct rt6_info *rt; + struct fib6_info *f6i; - rt = addrconf_get_prefix_route(&ifp->addr, + f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, 0, RTF_GATEWAY | RTF_DEFAULT); - if (rt) { + if (f6i) { if (del_rt) - ip6_del_rt(rt); + ip6_del_rt(dev_net(ifp->idev->dev), f6i); else { - if (!(rt->rt6i_flags & RTF_EXPIRES)) - rt6_set_expires(rt, expires); - ip6_rt_put(rt); + if (!(f6i->fib6_flags & RTF_EXPIRES)) + fib6_set_expires(f6i, expires); + fib6_info_release(f6i); } } } @@ -2320,7 +2319,7 @@ static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, - unsigned long expires, u32 flags) + unsigned long expires, u32 flags, gfp_t gfp_flags) { struct fib6_config cfg = { .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX, @@ -2331,6 +2330,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_flags = RTF_UP | flags, .fc_nlinfo.nl_net = dev_net(dev), .fc_protocol = RTPROT_KERNEL, + .fc_type = RTN_UNICAST, }; cfg.fc_dst = *pfx; @@ -2344,17 +2344,17 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, cfg.fc_flags |= RTF_NONEXTHOP; #endif - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, gfp_flags, NULL); } -static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, +static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, u32 flags, u32 noflags) { struct fib6_node *fn; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct fib6_table *table; u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; @@ -2368,14 +2368,13 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->dst.dev->ifindex != dev->ifindex) + if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex) continue; - if ((rt->rt6i_flags & flags) != flags) + if ((rt->fib6_flags & flags) != flags) continue; - if ((rt->rt6i_flags & noflags) != 0) + if ((rt->fib6_flags & noflags) != 0) continue; - if (!dst_hold_safe(&rt->dst)) - rt = NULL; + fib6_info_hold(rt); break; } out: @@ -2394,12 +2393,13 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, + .fc_type = RTN_UNICAST, .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, GFP_ATOMIC, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -2529,7 +2529,6 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, if (IS_ERR_OR_NULL(ifp)) return -1; - update_lft = 0; create = 1; spin_lock_bh(&ifp->lock); ifp->flags |= IFA_F_MANAGETEMPADDR; @@ -2551,7 +2550,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!update_lft && !create && stored_lft) { + if (!create && stored_lft) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); @@ -2642,7 +2641,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) */ if (pinfo->onlink) { - struct rt6_info *rt; + struct fib6_info *rt; unsigned long rt_expires; /* Avoid arithmetic overflow. Really, we could @@ -2667,13 +2666,13 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (rt) { /* Autoconf prefix route */ if (valid_lft == 0) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ - rt6_set_expires(rt, jiffies + rt_expires); + fib6_set_expires(rt, jiffies + rt_expires); } else { - rt6_clean_expires(rt); + fib6_clean_expires(rt); } } else if (valid_lft) { clock_t expires = 0; @@ -2684,9 +2683,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) expires = jiffies_to_clock_t(rt_expires); } addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, expires, flags); + dev, expires, flags, GFP_ATOMIC); } - ip6_rt_put(rt); + fib6_info_release(rt); } /* Try to figure out our local address for this prefix */ @@ -2899,9 +2898,14 @@ static int inet6_addr_add(struct net *net, int ifindex, if (!IS_ERR(ifp)) { if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - expires, flags); + expires, flags, GFP_KERNEL); } + /* Send a netlink notification if DAD is enabled and + * optimistic flag is not set + */ + if (!(ifp->flags & (IFA_F_OPTIMISTIC | IFA_F_NODAD))) + ipv6_ifa_notify(0, ifp); /* * Note that section 3.1 of RFC 4429 indicates * that the Optimistic flag should not be set for @@ -3047,7 +3051,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (addr.s6_addr32[3]) { add_addr(idev, &addr, plen, scope); - addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags); + addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags, + GFP_ATOMIC); return; } @@ -3072,7 +3077,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) add_addr(idev, &addr, plen, flag); addrconf_prefix_route(&addr, plen, idev->dev, 0, - pflags); + pflags, GFP_ATOMIC); } } } @@ -3112,7 +3117,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL); if (!IS_ERR(ifp)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, + 0, 0, GFP_ATOMIC); addrconf_dad_start(ifp); in6_ifa_put(ifp); } @@ -3227,7 +3233,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) addrconf_add_linklocal(idev, &addr, IFA_F_STABLE_PRIVACY); else if (prefix_route) - addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + addrconf_prefix_route(&addr, 64, idev->dev, + 0, 0, GFP_KERNEL); break; case IN6_ADDR_GEN_MODE_EUI64: /* addrconf_add_linklocal also adds a prefix_route and we @@ -3237,7 +3244,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) - addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + addrconf_prefix_route(&addr, 64, idev->dev, + 0, 0, GFP_KERNEL); break; case IN6_ADDR_GEN_MODE_NONE: default: @@ -3329,32 +3337,34 @@ static void addrconf_gre_config(struct net_device *dev) } #endif -static int fixup_permanent_addr(struct inet6_dev *idev, +static int fixup_permanent_addr(struct net *net, + struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - /* !rt6i_node means the host route was removed from the + /* !fib6_node means the host route was removed from the * FIB, for example, if 'lo' device is taken down. In that * case regenerate the host route. */ - if (!ifp->rt || !ifp->rt->rt6i_node) { - struct rt6_info *rt, *prev; + if (!ifp->rt || !ifp->rt->fib6_node) { + struct fib6_info *f6i, *prev; - rt = addrconf_dst_alloc(idev, &ifp->addr, false); - if (IS_ERR(rt)) - return PTR_ERR(rt); + f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false, + GFP_ATOMIC); + if (IS_ERR(f6i)) + return PTR_ERR(f6i); /* ifp->rt can be accessed outside of rtnl */ spin_lock(&ifp->lock); prev = ifp->rt; - ifp->rt = rt; + ifp->rt = f6i; spin_unlock(&ifp->lock); - ip6_rt_put(prev); + fib6_info_release(prev); } if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, - idev->dev, 0, 0); + idev->dev, 0, 0, GFP_ATOMIC); } if (ifp->state == INET6_IFADDR_STATE_PREDAD) @@ -3363,7 +3373,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev, return 0; } -static void addrconf_permanent_addr(struct net_device *dev) +static void addrconf_permanent_addr(struct net *net, struct net_device *dev) { struct inet6_ifaddr *ifp, *tmp; struct inet6_dev *idev; @@ -3376,7 +3386,7 @@ static void addrconf_permanent_addr(struct net_device *dev) list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { if ((ifp->flags & IFA_F_PERMANENT) && - fixup_permanent_addr(idev, ifp) < 0) { + fixup_permanent_addr(net, idev, ifp) < 0) { write_unlock_bh(&idev->lock); in6_ifa_hold(ifp); ipv6_del_addr(ifp); @@ -3445,7 +3455,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (event == NETDEV_UP) { /* restore routes for permanent addresses */ - addrconf_permanent_addr(dev); + addrconf_permanent_addr(net, dev); if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ @@ -3612,8 +3622,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; - int _keep_addr; - bool keep_addr; + bool keep_addr = false; int state, i; ASSERT_RTNL(); @@ -3639,15 +3648,18 @@ static int addrconf_ifdown(struct net_device *dev, int how) } - /* aggregate the system setting and interface setting */ - _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; - if (!_keep_addr) - _keep_addr = idev->cnf.keep_addr_on_down; - /* combine the user config with event to determine if permanent * addresses are to be removed from address hash table */ - keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6); + if (!how && !idev->cnf.disable_ipv6) { + /* aggregate the system setting and interface setting */ + int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + + if (!_keep_addr) + _keep_addr = idev->cnf.keep_addr_on_down; + + keep_addr = (_keep_addr > 0); + } /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { @@ -3697,13 +3709,8 @@ restart: write_lock_bh(&idev->lock); } - /* re-combine the user config with event to determine if permanent - * addresses are to be removed from the interface list - */ - keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; bool keep; addrconf_del_dad_work(ifa); @@ -3731,7 +3738,7 @@ restart: spin_unlock_bh(&ifa->lock); if (rt) - ip6_del_rt(rt); + ip6_del_rt(net, rt); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -3849,6 +3856,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; bool bump_id, notify = false; + struct net *net; addrconf_join_solict(dev, &ifp->addr); @@ -3859,8 +3867,9 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) if (ifp->state == INET6_IFADDR_STATE_DEAD) goto out; + net = dev_net(dev); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 && + (net->ipv6.devconf_all->accept_dad < 1 && idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { @@ -3896,8 +3905,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) * Frames right away */ if (ifp->flags & IFA_F_OPTIMISTIC) { - ip6_ins_rt(ifp->rt); - if (ipv6_use_optimistic_addr(dev_net(dev), idev)) { + ip6_ins_rt(net, ifp->rt); + if (ipv6_use_optimistic_addr(net, idev)) { /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ @@ -4563,8 +4572,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, ipv6_ifa_notify(0, ifp); if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - expires, flags); + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, + ifp->idev->dev, expires, flags, + GFP_KERNEL); } else if (had_prefixroute) { enum cleanup_prefix_rt_t action; unsigned long rt_expires; @@ -4804,9 +4814,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, u32 portid, u32 seq, int event, unsigned int flags) { + struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt); + int ifindex = dev ? dev->ifindex : 1; struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; - int ifindex = ifaca->aca_idev->dev->ifindex; if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; @@ -5029,14 +5040,6 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) struct net *net = dev_net(ifa->idev->dev); int err = -ENOBUFS; - /* Don't send DELADDR notification for TENTATIVE address, - * since NEWADDR notification is sent only after removing - * TENTATIVE flag, if DAD has not failed. - */ - if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) && - event == RTM_DELADDR) - return; - skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); if (!skb) goto errout; @@ -5607,29 +5610,30 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * our DAD process, so we don't need * to do it again */ - if (!rcu_access_pointer(ifp->rt->rt6i_node)) - ip6_ins_rt(ifp->rt); + if (!rcu_access_pointer(ifp->rt->fib6_node)) + ip6_ins_rt(net, ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) addrconf_prefix_route(&ifp->peer_addr, 128, - ifp->idev->dev, 0, 0); + ifp->idev->dev, 0, 0, + GFP_ATOMIC); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); if (!ipv6_addr_any(&ifp->peer_addr)) { - struct rt6_info *rt; + struct fib6_info *rt; rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, ifp->idev->dev, 0, 0); if (rt) - ip6_del_rt(rt); + ip6_del_rt(net, rt); } if (ifp->rt) { - if (dst_hold_safe(&ifp->rt->dst)) - ip6_del_rt(ifp->rt); + ip6_del_rt(net, ifp->rt); + ifp->rt = NULL; } rt_genid_bump_ipv6(net); break; @@ -5976,11 +5980,11 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) list_for_each_entry(ifa, &idev->addr_list, if_list) { spin_lock(&ifa->lock); if (ifa->rt) { - struct rt6_info *rt = ifa->rt; + struct fib6_info *rt = ifa->rt; int cpu; rcu_read_lock(); - addrconf_set_nopolicy(ifa->rt, val); + ifa->rt->dst_nopolicy = val ? true : false; if (rt->rt6i_pcpu) { for_each_possible_cpu(cpu) { struct rt6_info **rtp; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8da0b513f188..d0af96e0d109 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -273,33 +273,8 @@ out_rcu_unlock: goto out; } - -/* bind for INET6 API */ -int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) -{ - struct sock *sk = sock->sk; - int err = 0; - - /* If the socket has its own bind function then use it. */ - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); - - if (addr_len < SIN6_LEN_RFC2133) - return -EINVAL; - - /* BPF prog is run before any checks are done so that if the prog - * changes context in a wrong way it will be caught. - */ - err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr); - if (err) - return err; - - return __inet6_bind(sk, uaddr, addr_len, false, true); -} -EXPORT_SYMBOL(inet6_bind); - -int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock) +static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); @@ -444,6 +419,30 @@ out_unlock: goto out; } +/* bind for INET6 API */ +int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sock *sk = sock->sk; + int err = 0; + + /* If the socket has its own bind function then use it. */ + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, uaddr, addr_len); + + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + /* BPF prog is run before any checks are done so that if the prog + * changes context in a wrong way it will be caught. + */ + err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr); + if (err) + return err; + + return __inet6_bind(sk, uaddr, addr_len, false, true); +} +EXPORT_SYMBOL(inet6_bind); + int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -579,7 +578,9 @@ const struct proto_ops inet6_stream_ops = { .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet_sendmsg, /* ok */ .recvmsg = inet_recvmsg, /* ok */ - .mmap = sock_no_mmap, +#ifdef CONFIG_MMU + .mmap = tcp_mmap, +#endif .sendpage = inet_sendpage, .sendmsg_locked = tcp_sendmsg_locked, .sendpage_locked = tcp_sendpage_locked, @@ -590,6 +591,7 @@ const struct proto_ops inet6_stream_ops = { .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif + .set_rcvlowat = tcp_set_rcvlowat, }; const struct proto_ops inet6_dgram_ops = { diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index bbcabbba9bd8..0250d199e527 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -212,16 +212,14 @@ static void aca_get(struct ifacaddr6 *aca) static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) { - in6_dev_put(ac->aca_idev); - dst_release(&ac->aca_rt->dst); + fib6_info_release(ac->aca_rt); kfree(ac); } } -static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, +static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, const struct in6_addr *addr) { - struct inet6_dev *idev = rt->rt6i_idev; struct ifacaddr6 *aca; aca = kzalloc(sizeof(*aca), GFP_ATOMIC); @@ -229,9 +227,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, return NULL; aca->aca_addr = *addr; - in6_dev_hold(idev); - aca->aca_idev = idev; - aca->aca_rt = rt; + fib6_info_hold(f6i); + aca->aca_rt = f6i; aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; @@ -246,7 +243,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca; - struct rt6_info *rt; + struct fib6_info *f6i; + struct net *net; int err; ASSERT_RTNL(); @@ -265,14 +263,15 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) } } - rt = addrconf_dst_alloc(idev, addr, true); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); + net = dev_net(idev->dev); + f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC); + if (IS_ERR(f6i)) { + err = PTR_ERR(f6i); goto out; } - aca = aca_alloc(rt, addr); + aca = aca_alloc(f6i, addr); if (!aca) { - ip6_rt_put(rt); + fib6_info_release(f6i); err = -ENOMEM; goto out; } @@ -286,7 +285,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca_get(aca); write_unlock_bh(&idev->lock); - ip6_ins_rt(rt); + ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -328,8 +327,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->dst); - ip6_del_rt(aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt); aca_put(aca); return 0; @@ -356,8 +354,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->dst); - ip6_del_rt(aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt); aca_put(aca); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index bc68eb661970..5bc2bf3733ab 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -280,6 +280,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = { static int ipv6_destopt_rcv(struct sk_buff *skb) { + struct inet6_dev *idev = __in6_dev_get(skb->dev); struct inet6_skb_parm *opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) __u16 dstbuf; @@ -291,7 +292,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + __IP6_INC_STATS(dev_net(dst->dev), idev, IPSTATS_MIB_INHDRERRORS); fail_and_free: kfree_skb(skb); @@ -319,8 +320,7 @@ fail_and_free: return 1; } - __IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); return -1; } @@ -416,8 +416,7 @@ looped_back: } if (hdr->segments_left >= (hdr->hdrlen >> 1)) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); @@ -456,8 +455,7 @@ looped_back: if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); kfree_skb(skb); @@ -481,10 +479,10 @@ looped_back: /* called with rcu_read_lock() */ static int ipv6_rthdr_rcv(struct sk_buff *skb) { + struct inet6_dev *idev = __in6_dev_get(skb->dev); struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr = NULL; struct in6_addr daddr; - struct inet6_dev *idev; int n, i; struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; @@ -498,8 +496,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -508,8 +505,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -527,7 +523,7 @@ looped_back: * processed by own */ if (!addr) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -553,8 +549,7 @@ looped_back: goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } @@ -572,8 +567,7 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); @@ -609,14 +603,12 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -627,8 +619,7 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } @@ -647,8 +638,7 @@ looped_back: if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); kfree_skb(skb); @@ -663,7 +653,7 @@ looped_back: return -1; unknown_rh: - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; @@ -755,34 +745,31 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); + struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); struct net *net = ipv6_skb_net(skb); u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - __IP6_INC_STATS(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - __IP6_INC_STATS(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return false; } if (ipv6_hdr(skb)->payload_len) { - __IP6_INC_STATS(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return false; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - __IP6_INC_STATS(net, ipv6_skb_idev(skb), - IPSTATS_MIB_INTRUNCATEDPKTS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index df113c7b5fc8..6547fc6491a6 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -245,15 +245,18 @@ static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = { static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, - struct nlattr **tb) + struct nlattr **tb, + struct netlink_ext_ack *extack) { int err = -EINVAL; struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { - if (rule->table == RT6_TABLE_UNSPEC) + if (rule->table == RT6_TABLE_UNSPEC) { + NL_SET_ERR_MSG(extack, "Invalid table"); goto errout; + } if (fib6_new_table(net, rule->table) == NULL) { err = -ENOBUFS; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index deab2db6692e..6421c893466e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly; struct fib6_cleaner { struct fib6_walker w; struct net *net; - int (*func)(struct rt6_info *, void *arg); + int (*func)(struct fib6_info *, void *arg); int sernum; void *arg; }; @@ -54,7 +54,7 @@ struct fib6_cleaner { #define FWS_INIT FWS_L #endif -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, @@ -105,13 +105,12 @@ enum { FIB6_NO_SERNUM_CHANGE = 0, }; -void fib6_update_sernum(struct rt6_info *rt) +void fib6_update_sernum(struct net *net, struct fib6_info *f6i) { - struct net *net = dev_net(rt->dst.dev); struct fib6_node *fn; - fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + fn = rcu_dereference_protected(f6i->fib6_node, + lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) fn->fn_sernum = fib6_new_sernum(net); } @@ -146,6 +145,69 @@ static __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) +{ + struct fib6_info *f6i; + + f6i = kzalloc(sizeof(*f6i), gfp_flags); + if (!f6i) + return NULL; + + f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); + if (!f6i->rt6i_pcpu) { + kfree(f6i); + return NULL; + } + + INIT_LIST_HEAD(&f6i->fib6_siblings); + f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; + + atomic_inc(&f6i->fib6_ref); + + return f6i; +} + +void fib6_info_destroy(struct fib6_info *f6i) +{ + struct rt6_exception_bucket *bucket; + struct dst_metrics *m; + + WARN_ON(f6i->fib6_node); + + bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); + if (bucket) { + f6i->rt6i_exception_bucket = NULL; + kfree(bucket); + } + + if (f6i->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } + } + + if (f6i->fib6_nh.nh_dev) + dev_put(f6i->fib6_nh.nh_dev); + + m = f6i->fib6_metrics; + if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) + kfree(m); + + kfree(f6i); +} +EXPORT_SYMBOL_GPL(fib6_info_destroy); + static struct fib6_node *node_alloc(struct net *net) { struct fib6_node *fn; @@ -176,28 +238,6 @@ static void node_free(struct net *net, struct fib6_node *fn) net->ipv6.rt6_stats->fib_nodes--; } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) -{ - int cpu; - - if (!non_pcpu_rt->rt6i_pcpu) - return; - - for_each_possible_cpu(cpu) { - struct rt6_info **ppcpu_rt; - struct rt6_info *pcpu_rt; - - ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); - pcpu_rt = *ppcpu_rt; - if (pcpu_rt) { - dst_dev_put(&pcpu_rt->dst); - dst_release(&pcpu_rt->dst); - *ppcpu_rt = NULL; - } - } -} -EXPORT_SYMBOL_GPL(rt6_free_pcpu); - static void fib6_free_table(struct fib6_table *table) { inetpeer_invalidate_tree(&table->tb6_peers); @@ -232,7 +272,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) if (table) { table->tb6_id = id; rcu_assign_pointer(table->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); } @@ -340,7 +380,7 @@ unsigned int fib6_tables_seq_read(struct net *net) static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, - struct rt6_info *rt) + struct fib6_info *rt) { struct fib6_entry_notifier_info info = { .rt = rt, @@ -351,7 +391,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, static int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, - struct rt6_info *rt, + struct fib6_info *rt, struct netlink_ext_ack *extack) { struct fib6_entry_notifier_info info = { @@ -359,7 +399,7 @@ static int call_fib6_entry_notifiers(struct net *net, .rt = rt, }; - rt->rt6i_table->fib_seq++; + rt->fib6_table->fib_seq++; return call_fib6_notifiers(net, event_type, &info.info); } @@ -368,16 +408,16 @@ struct fib6_dump_arg { struct notifier_block *nb; }; -static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg) +static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { - if (rt == arg->net->ipv6.ip6_null_entry) + if (rt == arg->net->ipv6.fib6_null_entry) return; call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); } static int fib6_node_dump(struct fib6_walker *w) { - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) fib6_rt_dump(rt, w->args); @@ -426,7 +466,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb) static int fib6_dump_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) { res = rt6_dump_route(rt, w->args); @@ -441,10 +481,10 @@ static int fib6_dump_node(struct fib6_walker *w) * last sibling of this route (no need to dump the * sibling routes again) */ - if (rt->rt6i_nsiblings) - rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, - rt6i_siblings); + if (rt->fib6_nsiblings) + rt = list_last_entry(&rt->fib6_siblings, + struct fib6_info, + fib6_siblings); } w->leaf = NULL; return 0; @@ -579,6 +619,24 @@ out: return res; } +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) +{ + if (!f6i) + return; + + if (f6i->fib6_metrics == &dst_default_metrics) { + struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC); + + if (!p) + return; + + refcount_set(&p->refcnt, 1); + f6i->fib6_metrics = p; + } + + f6i->fib6_metrics->metrics[metric - 1] = val; +} + /* * Routing Table * @@ -608,7 +666,7 @@ static struct fib6_node *fib6_add_1(struct net *net, fn = root; do { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); key = (struct rt6key *)((u8 *)leaf + offset); @@ -637,11 +695,11 @@ static struct fib6_node *fib6_add_1(struct net *net, /* clean up an intermediate node */ if (!(fn->fn_flags & RTN_RTINFO)) { RCU_INIT_POINTER(fn->leaf, NULL); - rt6_release(leaf); + fib6_info_release(leaf); /* remove null_entry in the root node */ } else if (fn->fn_flags & RTN_TL_ROOT && rcu_access_pointer(fn->leaf) == - net->ipv6.ip6_null_entry) { + net->ipv6.fib6_null_entry) { RCU_INIT_POINTER(fn->leaf, NULL); } @@ -750,7 +808,7 @@ insert_above: RCU_INIT_POINTER(in->parent, pn); in->leaf = fn->leaf; atomic_inc(&rcu_dereference_protected(in->leaf, - lockdep_is_held(&table->tb6_lock))->rt6i_ref); + lockdep_is_held(&table->tb6_lock))->fib6_ref); /* update parent pointer */ if (dir) @@ -802,44 +860,37 @@ insert_above: return ln; } -static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) -{ - int i; - - for (i = 0; i < RTAX_MAX; i++) { - if (test_bit(i, mxc->mx_valid)) - mp[i] = mxc->mx[i]; - } -} - -static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) +static void fib6_drop_pcpu_from(struct fib6_info *f6i, + const struct fib6_table *table) { - if (!mxc->mx) - return 0; - - if (dst->flags & DST_HOST) { - u32 *mp = dst_metrics_write_ptr(dst); + int cpu; - if (unlikely(!mp)) - return -ENOMEM; + /* release the reference to this fib entry from + * all of its cached pcpu routes + */ + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; - fib6_copy_metrics(mp, mxc); - } else { - dst_init_metrics(dst, mxc->mx, false); + ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + struct fib6_info *from; - /* We've stolen mx now. */ - mxc->mx = NULL; + from = rcu_dereference_protected(pcpu_rt->from, + lockdep_is_held(&table->tb6_lock)); + rcu_assign_pointer(pcpu_rt->from, NULL); + fib6_info_release(from); + } } - - return 0; } -static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, +static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, struct net *net) { - struct fib6_table *table = rt->rt6i_table; + struct fib6_table *table = rt->fib6_table; - if (atomic_read(&rt->rt6i_ref) != 1) { + if (atomic_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, * which must be released in time. So, scan ascendant nodes @@ -847,18 +898,22 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * to still alive ones. */ while (fn) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_leaf; + struct fib6_info *new_leaf; if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { new_leaf = fib6_find_prefix(net, table, fn); - atomic_inc(&new_leaf->rt6i_ref); + atomic_inc(&new_leaf->fib6_ref); + rcu_assign_pointer(fn->leaf, new_leaf); - rt6_release(rt); + fib6_info_release(rt); } fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&table->tb6_lock)); } + + if (rt->rt6i_pcpu) + fib6_drop_pcpu_from(rt, table); } } @@ -866,15 +921,15 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * Insert routing information in a node. */ -static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, +static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - struct rt6_info *iter = NULL; - struct rt6_info __rcu **ins; - struct rt6_info __rcu **fallback_ins = NULL; + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&rt->fib6_table->tb6_lock)); + struct fib6_info *iter = NULL; + struct fib6_info __rcu **ins; + struct fib6_info __rcu **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); int add = (!info->nlh || @@ -891,12 +946,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, for (iter = leaf; iter; iter = rcu_dereference_protected(iter->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock))) { + lockdep_is_held(&rt->fib6_table->tb6_lock))) { /* * Search for duplicates */ - if (iter->rt6i_metric == rt->rt6i_metric) { + if (iter->fib6_metric == rt->fib6_metric) { /* * Same priority level */ @@ -916,15 +971,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, } if (rt6_duplicate_nexthop(iter, rt)) { - if (rt->rt6i_nsiblings) - rt->rt6i_nsiblings = 0; - if (!(iter->rt6i_flags & RTF_EXPIRES)) + if (rt->fib6_nsiblings) + rt->fib6_nsiblings = 0; + if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; - if (!(rt->rt6i_flags & RTF_EXPIRES)) - rt6_clean_expires(iter); + if (!(rt->fib6_flags & RTF_EXPIRES)) + fib6_clean_expires(iter); else - rt6_set_expires(iter, rt->dst.expires); - iter->rt6i_pmtu = rt->rt6i_pmtu; + fib6_set_expires(iter, rt->expires); + fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu); return -EEXIST; } /* If we have the same destination and the same metric, @@ -940,10 +995,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, */ if (rt_can_ecmp && rt6_qualify_for_ecmp(iter)) - rt->rt6i_nsiblings++; + rt->fib6_nsiblings++; } - if (iter->rt6i_metric > rt->rt6i_metric) + if (iter->fib6_metric > rt->fib6_metric) break; next_iter: @@ -954,7 +1009,7 @@ next_iter: /* No ECMP-able route found, replace first non-ECMP one */ ins = fallback_ins; iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); found++; } @@ -963,34 +1018,34 @@ next_iter: fn->rr_ptr = NULL; /* Link this route to others same route. */ - if (rt->rt6i_nsiblings) { - unsigned int rt6i_nsiblings; - struct rt6_info *sibling, *temp_sibling; + if (rt->fib6_nsiblings) { + unsigned int fib6_nsiblings; + struct fib6_info *sibling, *temp_sibling; /* Find the first route that have the same metric */ sibling = leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric && + if (sibling->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->rt6i_siblings, - &sibling->rt6i_siblings); + list_add_tail(&rt->fib6_siblings, + &sibling->fib6_siblings); break; } sibling = rcu_dereference_protected(sibling->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } /* For each sibling in the list, increment the counter of * siblings. BUG() if counters does not match, list of siblings * is broken! */ - rt6i_nsiblings = 0; + fib6_nsiblings = 0; list_for_each_entry_safe(sibling, temp_sibling, - &rt->rt6i_siblings, rt6i_siblings) { - sibling->rt6i_nsiblings++; - BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); - rt6i_nsiblings++; + &rt->fib6_siblings, fib6_siblings) { + sibling->fib6_nsiblings++; + BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings); + fib6_nsiblings++; } - BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + BUG_ON(fib6_nsiblings != rt->fib6_nsiblings); rt6_multipath_rebalance(temp_sibling); } @@ -1003,9 +1058,6 @@ next_iter: add: nlflags |= NLM_F_CREATE; - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, @@ -1014,8 +1066,8 @@ add: return err; rcu_assign_pointer(rt->rt6_next, iter); - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(rt->rt6i_node, fn); + atomic_inc(&rt->fib6_ref); + rcu_assign_pointer(rt->fib6_node, fn); rcu_assign_pointer(*ins, rt); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); @@ -1036,18 +1088,14 @@ add: return -ENOENT; } - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; - err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt, extack); if (err) return err; - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(rt->rt6i_node, fn); + atomic_inc(&rt->fib6_ref); + rcu_assign_pointer(rt->fib6_node, fn); rt->rt6_next = iter->rt6_next; rcu_assign_pointer(*ins, rt); if (!info->skip_notify) @@ -1056,35 +1104,35 @@ add: info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } - nsiblings = iter->rt6i_nsiblings; - iter->rt6i_node = NULL; + nsiblings = iter->fib6_nsiblings; + iter->fib6_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - rt6_release(iter); + fib6_info_release(iter); if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ ins = &rt->rt6_next; iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); while (iter) { - if (iter->rt6i_metric > rt->rt6i_metric) + if (iter->fib6_metric > rt->fib6_metric) break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->rt6_next; - iter->rt6i_node = NULL; + iter->fib6_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - rt6_release(iter); + fib6_info_release(iter); nsiblings--; info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { ins = &iter->rt6_next; } iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } WARN_ON(nsiblings != 0); } @@ -1093,10 +1141,10 @@ add: return 0; } -static void fib6_start_gc(struct net *net, struct rt6_info *rt) +static void fib6_start_gc(struct net *net, struct fib6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) + (rt->fib6_flags & RTF_EXPIRES)) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -1108,22 +1156,22 @@ void fib6_force_start_gc(struct net *net) jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } -static void __fib6_update_sernum_upto_root(struct rt6_info *rt, +static void __fib6_update_sernum_upto_root(struct fib6_info *rt, int sernum) { - struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); /* paired with smp_rmb() in rt6_get_cookie_safe() */ smp_wmb(); while (fn) { fn->fn_sernum = sernum; fn = rcu_dereference_protected(fn->parent, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } } -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) { __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); } @@ -1135,22 +1183,16 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) * Need to own table->tb6_lock */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack) +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { - struct fib6_table *table = rt->rt6i_table; + struct fib6_table *table = rt->fib6_table; struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; int allow_create = 1; int replace_required = 0; int sernum = fib6_new_sernum(info->nl_net); - if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) - return -EINVAL; - if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) - return -EINVAL; - if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) allow_create = 0; @@ -1161,8 +1203,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); fn = fib6_add_1(info->nl_net, table, root, - &rt->rt6i_dst.addr, rt->rt6i_dst.plen, - offsetof(struct rt6_info, rt6i_dst), allow_create, + &rt->fib6_dst.addr, rt->fib6_dst.plen, + offsetof(struct fib6_info, fib6_dst), allow_create, replace_required, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); @@ -1173,7 +1215,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, pn = fn; #ifdef CONFIG_IPV6_SUBTREES - if (rt->rt6i_src.plen) { + if (rt->fib6_src.plen) { struct fib6_node *sn; if (!rcu_access_pointer(fn->subtree)) { @@ -1194,16 +1236,16 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!sfn) goto failure; - atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); + atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref); rcu_assign_pointer(sfn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); sfn->fn_flags = RTN_ROOT; /* Now add the first leaf node to new subtree */ sn = fib6_add_1(info->nl_net, table, sfn, - &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + &rt->fib6_src.addr, rt->fib6_src.plen, + offsetof(struct fib6_info, fib6_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1221,8 +1263,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, rcu_assign_pointer(fn->subtree, sfn); } else { sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn), - &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + &rt->fib6_src.addr, rt->fib6_src.plen, + offsetof(struct fib6_info, fib6_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1235,9 +1277,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (fn->fn_flags & RTN_TL_ROOT) { /* put back null_entry for root node */ rcu_assign_pointer(fn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); } else { - atomic_inc(&rt->rt6i_ref); + atomic_inc(&rt->fib6_ref); rcu_assign_pointer(fn->leaf, rt); } } @@ -1245,7 +1287,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, info, mxc, extack); + err = fib6_add_rt2node(fn, rt, info, extack); if (!err) { __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); @@ -1259,13 +1301,13 @@ out: * super-tree leaf node we have to find a new one for it. */ if (pn != fn) { - struct rt6_info *pn_leaf = + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); if (pn_leaf == rt) { pn_leaf = NULL; RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); + fib6_info_release(rt); } if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, @@ -1274,10 +1316,10 @@ out: if (!pn_leaf) { WARN_ON(!pn_leaf); pn_leaf = - info->nl_net->ipv6.ip6_null_entry; + info->nl_net->ipv6.fib6_null_entry; } #endif - atomic_inc(&pn_leaf->rt6i_ref); + fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } } @@ -1299,10 +1341,6 @@ failure: (fn->fn_flags & RTN_TL_ROOT && !rcu_access_pointer(fn->leaf)))) fib6_repair_tree(info->nl_net, table, fn); - /* Always release dst as dst->__refcnt is guaranteed - * to be taken before entering this function - */ - dst_release_immediate(&rt->dst); return err; } @@ -1312,7 +1350,7 @@ failure: */ struct lookup_args { - int offset; /* key offset on rt6_info */ + int offset; /* key offset on fib6_info */ const struct in6_addr *addr; /* search key */ }; @@ -1350,7 +1388,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, struct fib6_node *subtree = FIB6_SUBTREE(fn); if (subtree || fn->fn_flags & RTN_RTINFO) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; if (!leaf) @@ -1390,12 +1428,12 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad struct fib6_node *fn; struct lookup_args args[] = { { - .offset = offsetof(struct rt6_info, rt6i_dst), + .offset = offsetof(struct fib6_info, fib6_dst), .addr = daddr, }, #ifdef CONFIG_IPV6_SUBTREES { - .offset = offsetof(struct rt6_info, rt6i_src), + .offset = offsetof(struct fib6_info, fib6_src), .addr = saddr, }, #endif @@ -1431,7 +1469,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; /* This node is being deleted */ @@ -1480,7 +1518,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, struct fib6_node *fn; fn = fib6_locate_1(root, daddr, dst_len, - offsetof(struct rt6_info, rt6i_dst), + offsetof(struct fib6_info, fib6_dst), exact_match); #ifdef CONFIG_IPV6_SUBTREES @@ -1491,7 +1529,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, if (subtree) { fn = fib6_locate_1(subtree, saddr, src_len, - offsetof(struct rt6_info, rt6i_src), + offsetof(struct fib6_info, fib6_src), exact_match); } } @@ -1510,14 +1548,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root, * */ -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn) { struct fib6_node *child_left, *child_right; if (fn->fn_flags & RTN_ROOT) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; while (fn) { child_left = rcu_dereference_protected(fn->left, @@ -1554,7 +1592,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, /* Set fn->leaf to null_entry for root node. */ if (fn->fn_flags & RTN_TL_ROOT) { - rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry); + rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry); return fn; } @@ -1569,11 +1607,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net, lockdep_is_held(&table->tb6_lock)); struct fib6_node *pn_l = rcu_dereference_protected(pn->left, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_fn_leaf; + struct fib6_info *new_fn_leaf; RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; @@ -1599,10 +1637,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, #if RT6_DEBUG >= 2 if (!new_fn_leaf) { WARN_ON(!new_fn_leaf); - new_fn_leaf = net->ipv6.ip6_null_entry; + new_fn_leaf = net->ipv6.fib6_null_entry; } #endif - atomic_inc(&new_fn_leaf->rt6i_ref); + fib6_info_hold(new_fn_leaf); rcu_assign_pointer(fn->leaf, new_fn_leaf); return pn; } @@ -1658,26 +1696,24 @@ static struct fib6_node *fib6_repair_tree(struct net *net, return pn; RCU_INIT_POINTER(pn->leaf, NULL); - rt6_release(pn_leaf); + fib6_info_release(pn_leaf); fn = pn; } } static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, - struct rt6_info __rcu **rtp, struct nl_info *info) + struct fib6_info __rcu **rtp, struct nl_info *info) { struct fib6_walker *w; - struct rt6_info *rt = rcu_dereference_protected(*rtp, + struct fib6_info *rt = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); - WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); - /* Unlink it */ *rtp = rt->rt6_next; - rt->rt6i_node = NULL; + rt->fib6_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@ -1689,14 +1725,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, fn->rr_ptr = NULL; /* Remove this entry from other siblings */ - if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, rt6i_siblings) - sibling->rt6i_nsiblings--; - rt->rt6i_nsiblings = 0; - list_del_init(&rt->rt6i_siblings); + &rt->fib6_siblings, fib6_siblings) + sibling->fib6_nsiblings--; + rt->fib6_nsiblings = 0; + list_del_init(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } @@ -1730,40 +1766,30 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); - rt6_release(rt); + fib6_info_release(rt); } /* Need to own table->tb6_lock */ -int fib6_del(struct rt6_info *rt, struct nl_info *info) +int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - struct fib6_table *table = rt->rt6i_table; + struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); + struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; - struct rt6_info __rcu **rtp; - struct rt6_info __rcu **rtp_next; + struct fib6_info __rcu **rtp; + struct fib6_info __rcu **rtp_next; -#if RT6_DEBUG >= 2 - if (rt->dst.obsolete > 0) { - WARN_ON(fn); - return -ENOENT; - } -#endif - if (!fn || rt == net->ipv6.ip6_null_entry) + if (!fn || rt == net->ipv6.fib6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - /* remove cached dst from exception table */ - if (rt->rt6i_flags & RTF_CACHE) - return rt6_remove_exception_rt(rt); - /* * Walk the leaf entries looking for ourself */ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) { - struct rt6_info *cur = rcu_dereference_protected(*rtp, + struct fib6_info *cur = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); if (rt == cur) { fib6_del_route(table, fn, rtp, info); @@ -1907,7 +1933,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w) static int fib6_clean_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, @@ -1932,17 +1958,17 @@ static int fib6_clean_node(struct fib6_walker *w) #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", __func__, rt, - rcu_access_pointer(rt->rt6i_node), + rcu_access_pointer(rt->fib6_node), res); #endif continue; } return 0; } else if (res == -2) { - if (WARN_ON(!rt->rt6i_nsiblings)) + if (WARN_ON(!rt->fib6_nsiblings)) continue; - rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, rt6i_siblings); + rt = list_last_entry(&rt->fib6_siblings, + struct fib6_info, fib6_siblings); continue; } WARN_ON(res != 0); @@ -1961,7 +1987,7 @@ static int fib6_clean_node(struct fib6_walker *w) */ static void fib6_clean_tree(struct net *net, struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), + int (*func)(struct fib6_info *, void *arg), int sernum, void *arg) { struct fib6_cleaner c; @@ -1979,7 +2005,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } static void __fib6_clean_all(struct net *net, - int (*func)(struct rt6_info *, void *), + int (*func)(struct fib6_info *, void *), int sernum, void *arg) { struct fib6_table *table; @@ -1999,7 +2025,7 @@ static void __fib6_clean_all(struct net *net, rcu_read_unlock(); } -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), void *arg) { __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); @@ -2016,7 +2042,7 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct rt6_info *rt, void *arg) +static int fib6_age(struct fib6_info *rt, void *arg) { struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; @@ -2026,8 +2052,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) * Routes are expired even if they are in use. */ - if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { - if (time_after(now, rt->dst.expires)) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { + if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; } @@ -2110,7 +2136,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); @@ -2122,7 +2148,7 @@ static int __net_init fib6_net_init(struct net *net) goto out_fib6_main_tbl; net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); @@ -2220,25 +2246,26 @@ struct ipv6_route_iter { static int ipv6_route_seq_show(struct seq_file *seq, void *v) { - struct rt6_info *rt = v; + struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + const struct net_device *dev; - seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); + seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen); #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->rt6i_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &rt->rt6i_gateway); + if (rt->fib6_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); else seq_puts(seq, "00000000000000000000000000000000"); + dev = rt->fib6_nh.nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); + rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, + rt->fib6_flags, dev ? dev->name : ""); iter->w.leaf = NULL; return 0; } @@ -2311,14 +2338,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int r; - struct rt6_info *n; + struct fib6_info *n; struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; if (!v) goto iter_table; - n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next); + n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next); if (n) { ++*pos; return n; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 69727bc168cb..04c69e0c84b3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -807,7 +807,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) } /** - * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own + * ip6gre_tnl_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device * @hdr: IPv6 header from the incoming packet * @@ -896,6 +896,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, struct flowi6 fl6; int err = -EINVAL; __u32 mtu; + int nhoff; if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; @@ -908,6 +909,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } + nhoff = skb_network_header(skb) - skb_mac_header(skb); + if (skb->protocol == htons(ETH_P_IP) && + (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) + truncate = true; + if (skb_cow_head(skb, dev->needed_headroom)) goto tx_err; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 9ee208a348f5..f08d34491ece 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -336,7 +336,7 @@ int ip6_mc_input(struct sk_buff *skb) bool deliver; __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), - ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, + __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST, skb->len); hdr = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 4a87f9428ca5..5b3f2f89ef41 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -88,9 +88,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) - udpfrag = proto == IPPROTO_UDP && encap; + udpfrag = proto == IPPROTO_UDP && encap && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); else - udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2e891d2c30ef..dfd8af41824e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -425,6 +425,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) int ip6_forward(struct sk_buff *skb) { + struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); @@ -444,8 +445,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -476,8 +476,7 @@ int ip6_forward(struct sk_buff *skb) /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -490,15 +489,13 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); @@ -554,8 +551,7 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INTOOBIGERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); @@ -579,7 +575,7 @@ int ip6_forward(struct sk_buff *skb) ip6_forward_finish); error: - __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; @@ -966,15 +962,21 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * that's why we try it again later. */ if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { + struct fib6_info *from; struct rt6_info *rt; bool had_dst = *dst != NULL; if (!had_dst) *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; - err = ip6_route_get_saddr(net, rt, &fl6->daddr, + + rcu_read_lock(); + from = rt ? rcu_dereference(rt->from) : NULL; + err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, &fl6->saddr); + rcu_read_unlock(); + if (err) goto out_err_release; @@ -1238,6 +1240,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (mtu < IPV6_MIN_MTU) return -EINVAL; cork->base.fragsize = mtu; + cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0; + if (dst_allfrag(xfrm_dst_path(&rt->dst))) cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; @@ -1272,6 +1276,7 @@ static int __ip6_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; + bool paged; skb = skb_peek_tail(queue); if (!skb) { @@ -1279,7 +1284,8 @@ static int __ip6_append_data(struct sock *sk, dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } - mtu = cork->fragsize; + paged = !!cork->gso_size; + mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1327,7 +1333,7 @@ emsgsize: if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && length <= mtu - headersize && - !(flags & MSG_MORE) && + (!(flags & MSG_MORE) || cork->gso_size) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; @@ -1370,6 +1376,7 @@ emsgsize: unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; + unsigned int pagedlen = 0; alloc_new_skb: /* There's no room in the current skb */ if (skb) @@ -1392,11 +1399,17 @@ alloc_new_skb: if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; + fraglen = datalen + fragheaderlen; + if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else - alloclen = datalen + fragheaderlen; + else if (!paged) + alloclen = fraglen; + else { + alloclen = min_t(int, fraglen, MAX_HEADER); + pagedlen = fraglen - alloclen; + } alloclen += dst_exthdrlen; @@ -1418,7 +1431,7 @@ alloc_new_skb: */ alloclen += sizeof(struct frag_hdr); - copy = datalen - transhdrlen - fraggap; + copy = datalen - transhdrlen - fraggap - pagedlen; if (copy < 0) { err = -EINVAL; goto error; @@ -1457,7 +1470,7 @@ alloc_new_skb: /* * Find where to start putting bytes */ - data = skb_put(skb, fraglen); + data = skb_put(skb, fraglen - pagedlen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + @@ -1480,7 +1493,7 @@ alloc_new_skb: } offset += copy; - length -= datalen - fraggap; + length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; @@ -1753,9 +1766,9 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, + struct inet_cork_full *cork, const struct sockcm_cookie *sockc) { - struct inet_cork_full cork; struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); @@ -1766,27 +1779,27 @@ struct sk_buff *ip6_make_skb(struct sock *sk, __skb_queue_head_init(&queue); - cork.base.flags = 0; - cork.base.addr = 0; - cork.base.opt = NULL; - cork.base.dst = NULL; + cork->base.flags = 0; + cork->base.addr = 0; + cork->base.opt = NULL; + cork->base.dst = NULL; v6_cork.opt = NULL; - err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); + err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); if (err) { - ip6_cork_release(&cork, &v6_cork); + ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; - err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, + err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags, ipc6, sockc); if (err) { - __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); + __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); } - return __ip6_make_skb(sk, &queue, &cork, &v6_cork); + return __ip6_make_skb(sk, &queue, cork, &v6_cork); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 298fd8b6ed17..20a419ee8000 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -180,7 +180,8 @@ static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { }; static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh, struct nlattr **tb) + struct fib_rule_hdr *frh, struct nlattr **tb, + struct netlink_ext_ack *extack) { return 0; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9de4dfb126ba..9ac5366064e3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1155,7 +1155,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); struct neighbour *neigh = NULL; struct inet6_dev *in6_dev; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; + struct net *net; int lifetime; struct ndisc_options ndopts; int optlen; @@ -1253,9 +1254,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) /* Do not accept RA with source-addr found on local machine unless * accept_ra_from_local is set to true. */ + net = dev_net(in6_dev->dev); if (!in6_dev->cnf.accept_ra_from_local && - ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, - in6_dev->dev, 0)) { + ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", skb->dev->name); @@ -1272,20 +1273,22 @@ static void ndisc_router_discovery(struct sk_buff *skb) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif - rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); + rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { - neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); + neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, + rt->fib6_nh.nh_dev, NULL, + &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - ip6_rt_put(rt); + fib6_info_release(rt); return; } } if (rt && lifetime == 0) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } @@ -1294,7 +1297,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); - rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); + rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, + skb->dev, pref); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", @@ -1302,28 +1306,29 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); + neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, + rt->fib6_nh.nh_dev, NULL, + &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - ip6_rt_put(rt); + fib6_info_release(rt); return; } neigh->flags |= NTF_ROUTER; } else if (rt) { - rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); } if (rt) - rt6_set_expires(rt, jiffies + (HZ * lifetime)); + fib6_set_expires(rt, jiffies + (HZ * lifetime)); if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; - if (rt) - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, - ra_msg->icmph.icmp6_hop_limit); + fib6_metric_set(rt, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } else { ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } @@ -1475,10 +1480,7 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (in6_dev->cnf.mtu6 != mtu) { in6_dev->cnf.mtu6 = mtu; - - if (rt) - dst_metric_set(&rt->dst, RTAX_MTU, mtu); - + fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } } @@ -1497,7 +1499,7 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: - ip6_rt_put(rt); + fib6_info_release(rt); if (neigh) neigh_release(neigh); } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4979610287e2..b939b94e7e91 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -163,7 +163,8 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) } static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, - struct frag_hdr *fhdr, int nhoff) + struct frag_hdr *fhdr, int nhoff, + u32 *prob_offset) { struct sk_buff *prev, *next; struct net_device *dev; @@ -179,11 +180,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - ((u8 *)&fhdr->frag_off - - skb_network_header(skb))); + *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); return -1; } @@ -214,10 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - offsetof(struct ipv6hdr, payload_len)); + *prob_offset = offsetof(struct ipv6hdr, payload_len); return -1; } if (end > fq->q.len) { @@ -519,15 +513,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb) iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { + u32 prob_offset = 0; int ret; spin_lock(&fq->q.lock); fq->iif = iif; - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, + &prob_offset); spin_unlock(&fq->q.lock); inet_frag_put(&fq->q); + if (prob_offset) { + __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); + } return ret; } @@ -536,7 +537,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return -1; fail_hdr: - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f4d61736c41a..8ed1b519c2b0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,7 +78,6 @@ enum rt6_nud_state { RT6_NUD_SUCCEED = 1 }; -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); @@ -97,25 +96,24 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static void rt6_dst_from_metrics_check(struct rt6_info *rt); -static int rt6_score_route(struct rt6_info *rt, int oif, int strict); -static size_t rt6_nlmsg_size(struct rt6_info *rt); -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_score_route(struct fib6_info *rt, int oif, int strict); +static size_t rt6_nlmsg_size(struct fib6_info *rt); +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev); @@ -184,29 +182,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) } } -static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) -{ - return dst_metrics_write_ptr(&rt->from->dst); -} - -static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) -{ - struct rt6_info *rt = (struct rt6_info *)dst; - - if (rt->rt6i_flags & RTF_PCPU) - return rt6_pcpu_cow_metrics(rt); - else if (rt->rt6i_flags & RTF_CACHE) - return NULL; - else - return dst_cow_metrics_generic(dst, old); -} - -static inline const void *choose_neigh_daddr(struct rt6_info *rt, +static inline const void *choose_neigh_daddr(const struct in6_addr *p, struct sk_buff *skb, const void *daddr) { - struct in6_addr *p = &rt->rt6i_gateway; - if (!ipv6_addr_any(p)) return (const void *) p; else if (skb) @@ -214,18 +193,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt, return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, + struct sk_buff *skb, + const void *daddr) { - struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(dst->dev, daddr); + daddr = choose_neigh_daddr(gw, skb, daddr); + n = __ipv6_neigh_lookup(dev, daddr); if (n) return n; - return neigh_create(&nd_tbl, daddr, dst->dev); + return neigh_create(&nd_tbl, daddr, dev); +} + +static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + + return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) @@ -233,7 +221,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) struct net_device *dev = dst->dev; struct rt6_info *rt = (struct rt6_info *)dst; - daddr = choose_neigh_daddr(rt, NULL, daddr); + daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr); if (!daddr) return; if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -250,7 +238,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .mtu = ip6_mtu, - .cow_metrics = ipv6_cow_metrics, + .cow_metrics = dst_cow_metrics_generic, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -258,7 +246,7 @@ static struct dst_ops ip6_dst_ops_template = { .update_pmtu = ip6_rt_update_pmtu, .redirect = rt6_do_redirect, .local_out = __ip6_local_out, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, .confirm_neigh = ip6_confirm_neigh, }; @@ -288,13 +276,22 @@ static struct dst_ops ip6_dst_blackhole_ops = { .update_pmtu = ip6_rt_blackhole_update_pmtu, .redirect = ip6_rt_blackhole_redirect, .cow_metrics = dst_cow_metrics_generic, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, }; static const u32 ip6_template_metrics[RTAX_MAX] = { [RTAX_HOPLIMIT - 1] = 0, }; +static const struct fib6_info fib6_null_entry_template = { + .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP), + .fib6_protocol = RTPROT_KERNEL, + .fib6_metric = ~(u32)0, + .fib6_ref = ATOMIC_INIT(1), + .fib6_type = RTN_UNREACHABLE, + .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, +}; + static const struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -305,9 +302,6 @@ static const struct rt6_info ip6_null_entry_template = { .output = ip6_pkt_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -322,9 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = { .output = ip6_pkt_prohibit_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; static const struct rt6_info ip6_blk_hole_entry_template = { @@ -337,9 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .output = dst_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #endif @@ -349,14 +337,12 @@ static void rt6_info_init(struct rt6_info *rt) struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - INIT_LIST_HEAD(&rt->rt6i_siblings); INIT_LIST_HEAD(&rt->rt6i_uncached); } /* allocate dst with ip6_dst_ops */ -static struct rt6_info *__ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, flags); @@ -368,34 +354,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, return rt; } - -struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) -{ - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); - - if (rt) { - rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); - if (!rt->rt6i_pcpu) { - dst_release_immediate(&rt->dst); - return NULL; - } - } - - return rt; -} EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; - struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from; struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; @@ -403,14 +370,12 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); - if (bucket) { - rt->rt6i_exception_bucket = NULL; - kfree(bucket); - } - rt->from = NULL; - dst_release(&from->dst); + rcu_read_lock(); + from = rcu_dereference(rt->from); + rcu_assign_pointer(rt->from, NULL); + fib6_info_release(from); + rcu_read_unlock(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -440,23 +405,27 @@ static bool __rt6_check_expired(const struct rt6_info *rt) static bool rt6_check_expired(const struct rt6_info *rt) { + struct fib6_info *from; + + from = rcu_dereference(rt->from); + if (rt->rt6i_flags & RTF_EXPIRES) { if (time_after(jiffies, rt->dst.expires)) return true; - } else if (rt->from) { + } else if (from) { return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || - rt6_check_expired(rt->from); + fib6_check_expired(from); } return false; } -static struct rt6_info *rt6_multipath_select(const struct net *net, - struct rt6_info *match, +static struct fib6_info *rt6_multipath_select(const struct net *net, + struct fib6_info *match, struct flowi6 *fl6, int oif, const struct sk_buff *skb, int strict) { - struct rt6_info *sibling, *next_sibling; + struct fib6_info *sibling, *next_sibling; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. @@ -464,12 +433,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); - if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) return match; - list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, - rt6i_siblings) { - if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) + list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, + fib6_siblings) { + int nh_upper_bound; + + nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + if (fl6->mp_hash > nh_upper_bound) continue; if (rt6_score_route(sibling, oif, strict) < 0) break; @@ -484,38 +456,27 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, * Route lookup. rcu_read_lock() should be held. */ -static inline struct rt6_info *rt6_device_match(struct net *net, - struct rt6_info *rt, +static inline struct fib6_info *rt6_device_match(struct net *net, + struct fib6_info *rt, const struct in6_addr *saddr, int oif, int flags) { - struct rt6_info *local = NULL; - struct rt6_info *sprt; + struct fib6_info *sprt; - if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) + if (!oif && ipv6_addr_any(saddr) && + !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) return rt; for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { - struct net_device *dev = sprt->dst.dev; + const struct net_device *dev = sprt->fib6_nh.nh_dev; - if (sprt->rt6i_nh_flags & RTNH_F_DEAD) + if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; if (oif) { if (dev->ifindex == oif) return sprt; - if (dev->flags & IFF_LOOPBACK) { - if (!sprt->rt6i_idev || - sprt->rt6i_idev->dev->ifindex != oif) { - if (flags & RT6_LOOKUP_F_IFACE) - continue; - if (local && - local->rt6i_idev->dev->ifindex == oif) - continue; - } - local = sprt; - } } else { if (ipv6_chk_addr(net, saddr, dev, flags & RT6_LOOKUP_F_IFACE)) @@ -523,15 +484,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net, } } - if (oif) { - if (local) - return local; - - if (flags & RT6_LOOKUP_F_IFACE) - return net->ipv6.ip6_null_entry; - } + if (oif && flags & RT6_LOOKUP_F_IFACE) + return net->ipv6.fib6_null_entry; - return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; + return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -553,10 +509,13 @@ static void rt6_probe_deferred(struct work_struct *w) kfree(work); } -static void rt6_probe(struct rt6_info *rt) +static void rt6_probe(struct fib6_info *rt) { struct __rt6_probe_work *work; + const struct in6_addr *nh_gw; struct neighbour *neigh; + struct net_device *dev; + /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -565,20 +524,25 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) + if (!rt || !(rt->fib6_flags & RTF_GATEWAY)) return; + + nh_gw = &rt->fib6_nh.nh_gw; + dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { + struct inet6_dev *idev; + if (neigh->nud_state & NUD_VALID) goto out; + idev = __in6_dev_get(dev); work = NULL; write_lock(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, - neigh->updated + - rt->rt6i_idev->cnf.rtr_probe_interval)) { + neigh->updated + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) __neigh_set_probe_once(neigh); @@ -590,9 +554,9 @@ static void rt6_probe(struct rt6_info *rt) if (work) { INIT_WORK(&work->work, rt6_probe_deferred); - work->target = rt->rt6i_gateway; - dev_hold(rt->dst.dev); - work->dev = rt->dst.dev; + work->target = *nh_gw; + dev_hold(dev); + work->dev = dev; schedule_work(&work->work); } @@ -600,7 +564,7 @@ out: rcu_read_unlock_bh(); } #else -static inline void rt6_probe(struct rt6_info *rt) +static inline void rt6_probe(struct fib6_info *rt) { } #endif @@ -608,28 +572,27 @@ static inline void rt6_probe(struct rt6_info *rt) /* * Default Router Selection (RFC 2461 6.3.6) */ -static inline int rt6_check_dev(struct rt6_info *rt, int oif) +static inline int rt6_check_dev(struct fib6_info *rt, int oif) { - struct net_device *dev = rt->dst.dev; + const struct net_device *dev = rt->fib6_nh.nh_dev; + if (!oif || dev->ifindex == oif) return 2; - if ((dev->flags & IFF_LOOPBACK) && - rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) - return 1; return 0; } -static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) { - struct neighbour *neigh; enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; + struct neighbour *neigh; - if (rt->rt6i_flags & RTF_NONEXTHOP || - !(rt->rt6i_flags & RTF_GATEWAY)) + if (rt->fib6_flags & RTF_NONEXTHOP || + !(rt->fib6_flags & RTF_GATEWAY)) return RT6_NUD_SUCCEED; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, + &rt->fib6_nh.nh_gw); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@ -650,8 +613,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) return ret; } -static int rt6_score_route(struct rt6_info *rt, int oif, - int strict) +static int rt6_score_route(struct fib6_info *rt, int oif, int strict) { int m; @@ -659,7 +621,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, if (!m && (strict & RT6_LOOKUP_F_IFACE)) return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF - m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; + m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2; #endif if (strict & RT6_LOOKUP_F_REACHABLE) { int n = rt6_check_neigh(rt); @@ -669,23 +631,37 @@ static int rt6_score_route(struct rt6_info *rt, int oif, return m; } -static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match, +/* called with rc_read_lock held */ +static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i) +{ + const struct net_device *dev = fib6_info_nh_dev(f6i); + bool rc = false; + + if (dev) { + const struct inet6_dev *idev = __in6_dev_get(dev); + + rc = !!idev->cnf.ignore_routes_with_linkdown; + } + + return rc; +} + +static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, + int *mpri, struct fib6_info *match, bool *do_rr) { int m; bool match_do_rr = false; - struct inet6_dev *idev = rt->rt6i_idev; - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) goto out; - if (idev->cnf.ignore_routes_with_linkdown && - rt->rt6i_nh_flags & RTNH_F_LINKDOWN && + if (fib6_ignore_linkdown(rt) && + rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); @@ -709,19 +685,19 @@ out: return match; } -static struct rt6_info *find_rr_leaf(struct fib6_node *fn, - struct rt6_info *leaf, - struct rt6_info *rr_head, +static struct fib6_info *find_rr_leaf(struct fib6_node *fn, + struct fib6_info *leaf, + struct fib6_info *rr_head, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match, *cont; + struct fib6_info *rt, *match, *cont; int mpri = -1; match = NULL; cont = NULL; for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) { - if (rt->rt6i_metric != metric) { + if (rt->fib6_metric != metric) { cont = rt; break; } @@ -731,7 +707,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, for (rt = leaf; rt && rt != rr_head; rt = rcu_dereference(rt->rt6_next)) { - if (rt->rt6i_metric != metric) { + if (rt->fib6_metric != metric) { cont = rt; break; } @@ -748,16 +724,16 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, return match; } -static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, +static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, int oif, int strict) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); - struct rt6_info *match, *rt0; + struct fib6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *match, *rt0; bool do_rr = false; int key_plen; - if (!leaf || leaf == net->ipv6.ip6_null_entry) - return net->ipv6.ip6_null_entry; + if (!leaf || leaf == net->ipv6.fib6_null_entry) + return net->ipv6.fib6_null_entry; rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) @@ -768,39 +744,39 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, * (This might happen if all routes under fn are deleted from * the tree and fib6_repair_tree() is called on the node.) */ - key_plen = rt0->rt6i_dst.plen; + key_plen = rt0->fib6_dst.plen; #ifdef CONFIG_IPV6_SUBTREES - if (rt0->rt6i_src.plen) - key_plen = rt0->rt6i_src.plen; + if (rt0->fib6_src.plen) + key_plen = rt0->fib6_src.plen; #endif if (fn->fn_bit != key_plen) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; - match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, + match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict, &do_rr); if (do_rr) { - struct rt6_info *next = rcu_dereference(rt0->rt6_next); + struct fib6_info *next = rcu_dereference(rt0->rt6_next); /* no entries matched; do round-robin */ - if (!next || next->rt6i_metric != rt0->rt6i_metric) + if (!next || next->fib6_metric != rt0->fib6_metric) next = leaf; if (next != rt0) { - spin_lock_bh(&leaf->rt6i_table->tb6_lock); + spin_lock_bh(&leaf->fib6_table->tb6_lock); /* make sure next is not being deleted from the tree */ - if (next->rt6i_node) + if (next->fib6_node) rcu_assign_pointer(fn->rr_ptr, next); - spin_unlock_bh(&leaf->rt6i_table->tb6_lock); + spin_unlock_bh(&leaf->fib6_table->tb6_lock); } } - return match ? match : net->ipv6.ip6_null_entry; + return match ? match : net->ipv6.fib6_null_entry; } -static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) +static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) { - return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); + return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -812,7 +788,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr prefix_buf, *prefix; unsigned int pref; unsigned long lifetime; - struct rt6_info *rt; + struct fib6_info *rt; if (len < sizeof(struct route_info)) { return -EINVAL; @@ -850,13 +826,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (rinfo->prefix_len == 0) - rt = rt6_get_dflt_router(gwaddr, dev); + rt = rt6_get_dflt_router(net, gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev); if (rt && !lifetime) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } @@ -864,21 +840,162 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev, pref); else if (rt) - rt->rt6i_flags = RTF_ROUTEINFO | - (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + rt->fib6_flags = RTF_ROUTEINFO | + (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { if (!addrconf_finite_timeout(lifetime)) - rt6_clean_expires(rt); + fib6_clean_expires(rt); else - rt6_set_expires(rt, jiffies + HZ * lifetime); + fib6_set_expires(rt, jiffies + HZ * lifetime); - ip6_rt_put(rt); + fib6_info_release(rt); } return 0; } #endif +/* + * Misc support functions + */ + +/* called with rcu_lock held */ +static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) +{ + struct net_device *dev = rt->fib6_nh.nh_dev; + + if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { + /* for copies of local routes, dst->dev needs to be the + * device if it is a master device, the master device if + * device is enslaved, and the loopback as the default + */ + if (netif_is_l3_slave(dev) && + !rt6_need_strict(&rt->fib6_dst.addr)) + dev = l3mdev_master_dev_rcu(dev); + else if (!netif_is_l3_master(dev)) + dev = dev_net(dev)->loopback_dev; + /* last case is netif_is_l3_master(dev) is true in which + * case we want dev returned to be dev + */ + } + + return dev; +} + +static const int fib6_prop[RTN_MAX + 1] = { + [RTN_UNSPEC] = 0, + [RTN_UNICAST] = 0, + [RTN_LOCAL] = 0, + [RTN_BROADCAST] = 0, + [RTN_ANYCAST] = 0, + [RTN_MULTICAST] = 0, + [RTN_BLACKHOLE] = -EINVAL, + [RTN_UNREACHABLE] = -EHOSTUNREACH, + [RTN_PROHIBIT] = -EACCES, + [RTN_THROW] = -EAGAIN, + [RTN_NAT] = -EINVAL, + [RTN_XRESOLVE] = -EINVAL, +}; + +static int ip6_rt_type_to_error(u8 fib6_type) +{ + return fib6_prop[fib6_type]; +} + +static unsigned short fib6_info_dst_flags(struct fib6_info *rt) +{ + unsigned short flags = 0; + + if (rt->dst_nocount) + flags |= DST_NOCOUNT; + if (rt->dst_nopolicy) + flags |= DST_NOPOLICY; + if (rt->dst_host) + flags |= DST_HOST; + + return flags; +} + +static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + + switch (ort->fib6_type) { + case RTN_BLACKHOLE: + rt->dst.output = dst_discard_out; + rt->dst.input = dst_discard; + break; + case RTN_PROHIBIT: + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; + break; + case RTN_THROW: + case RTN_UNREACHABLE: + default: + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; + break; + } +} + +static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.flags |= fib6_info_dst_flags(ort); + + if (ort->fib6_flags & RTF_REJECT) { + ip6_rt_init_dst_reject(rt, ort); + return; + } + + rt->dst.error = 0; + rt->dst.output = ip6_output; + + if (ort->fib6_type == RTN_LOCAL) { + rt->dst.input = ip6_input; + } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { + rt->dst.input = ip6_mc_input; + } else { + rt->dst.input = ip6_forward; + } + + if (ort->fib6_nh.nh_lwtstate) { + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + lwtunnel_set_redirect(&rt->dst); + } + + rt->dst.lastuse = jiffies; +} + +static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) +{ + rt->rt6i_flags &= ~RTF_EXPIRES; + fib6_info_hold(from); + rcu_assign_pointer(rt->from, from); + dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); + if (from->fib6_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + refcount_inc(&from->fib6_metrics->refcnt); + } +} + +static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) +{ + struct net_device *dev = fib6_info_nh_dev(ort); + + ip6_rt_init_dst(rt, ort); + + rt->rt6i_dst = ort->fib6_dst; + rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; + rt->rt6i_gateway = ort->fib6_nh.nh_gw; + rt->rt6i_flags = ort->fib6_flags; + rt6_set_from(rt, ort); +#ifdef CONFIG_IPV6_SUBTREES + rt->rt6i_src = ort->fib6_src; +#endif + rt->rt6i_prefsrc = ort->fib6_prefsrc; + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); +} + static struct fib6_node* fib6_backtrack(struct fib6_node *fn, struct in6_addr *saddr) { @@ -914,14 +1031,29 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, return false; } +/* called with rcu_lock held */ +static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) +{ + unsigned short flags = fib6_info_dst_flags(rt); + struct net_device *dev = rt->fib6_nh.nh_dev; + struct rt6_info *nrt; + + nrt = ip6_dst_alloc(dev_net(dev), dev, flags); + if (nrt) + ip6_rt_copy_init(nrt, rt); + + return nrt; +} + static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { - struct rt6_info *rt, *rt_cache; + struct fib6_info *f6i; struct fib6_node *fn; + struct rt6_info *rt; if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) flags &= ~RT6_LOOKUP_F_IFACE; @@ -929,35 +1061,43 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = rcu_dereference(fn->leaf); - if (!rt) { - rt = net->ipv6.ip6_null_entry; + f6i = rcu_dereference(fn->leaf); + if (!f6i) { + f6i = net->ipv6.fib6_null_entry; } else { - rt = rt6_device_match(net, rt, &fl6->saddr, + f6i = rt6_device_match(net, f6i, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, - skb, flags); + if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) + f6i = rt6_multipath_select(net, f6i, fl6, + fl6->flowi6_oif, skb, flags); } - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } - /* Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - if (ip6_hold_safe(net, &rt, true)) - dst_use_noref(&rt->dst, jiffies); + /* Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) + dst_use_noref(&rt->dst, jiffies); + } else if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } else { + rt = ip6_create_rt_rcu(f6i); + if (!rt) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + } rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, @@ -999,55 +1139,28 @@ EXPORT_SYMBOL(rt6_lookup); * Caller must hold dst before calling it. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct mx6_config *mxc, +static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { int err; struct fib6_table *table; - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mxc, extack); + err = fib6_add(&table->tb6_root, rt, info, extack); spin_unlock_bh(&table->tb6_lock); return err; } -int ip6_ins_rt(struct rt6_info *rt) +int ip6_ins_rt(struct net *net, struct fib6_info *rt) { - struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - struct mx6_config mxc = { .mx = NULL, }; - - /* Hold dst to account for the reference from the fib6 tree */ - dst_hold(&rt->dst); - return __ip6_ins_rt(rt, &info, &mxc, NULL); -} - -/* called with rcu_lock held */ -static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) -{ - struct net_device *dev = rt->dst.dev; - - if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { - /* for copies of local routes, dst->dev needs to be the - * device if it is a master device, the master device if - * device is enslaved, and the loopback as the default - */ - if (netif_is_l3_slave(dev) && - !rt6_need_strict(&rt->rt6i_dst.addr)) - dev = l3mdev_master_dev_rcu(dev); - else if (!netif_is_l3_master(dev)) - dev = dev_net(dev)->loopback_dev; - /* last case is netif_is_l3_master(dev) is true in which - * case we want dev returned to be dev - */ - } + struct nl_info info = { .nl_net = net, }; - return dev; + return __ip6_ins_rt(rt, &info, NULL); } -static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, +static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -1058,26 +1171,20 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, * Clone the route. */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - - rcu_read_lock(); dev = ip6_rt_get_dev_rcu(ort); - rt = __ip6_dst_alloc(dev_net(dev), dev, 0); - rcu_read_unlock(); + rt = ip6_dst_alloc(dev_net(dev), dev, 0); if (!rt) return NULL; ip6_rt_copy_init(rt, ort); rt->rt6i_flags |= RTF_CACHE; - rt->rt6i_metric = 0; rt->dst.flags |= DST_HOST; rt->rt6i_dst.addr = *daddr; rt->rt6i_dst.plen = 128; if (!rt6_is_gw_or_nonexthop(ort)) { - if (ort->rt6i_dst.plen != 128 && - ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) + if (ort->fib6_dst.plen != 128 && + ipv6_addr_equal(&ort->fib6_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -1090,45 +1197,44 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, return rt; } -static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) +static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) { + unsigned short flags = fib6_info_dst_flags(rt); struct net_device *dev; struct rt6_info *pcpu_rt; rcu_read_lock(); dev = ip6_rt_get_dev_rcu(rt); - pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); + pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); rcu_read_unlock(); if (!pcpu_rt) return NULL; ip6_rt_copy_init(pcpu_rt, rt); - pcpu_rt->rt6i_protocol = rt->rt6i_protocol; pcpu_rt->rt6i_flags |= RTF_PCPU; return pcpu_rt; } /* It should be called with rcu_read_lock() acquired */ -static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) { struct rt6_info *pcpu_rt, **p; p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) - rt6_dst_from_metrics_check(pcpu_rt); + if (pcpu_rt) + ip6_hold_safe(NULL, &pcpu_rt, false); return pcpu_rt; } -static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_make_pcpu_route(struct net *net, + struct fib6_info *rt) { struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { - struct net *net = dev_net(rt->dst.dev); - dst_hold(&net->ipv6.ip6_null_entry->dst); return net->ipv6.ip6_null_entry; } @@ -1138,7 +1244,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev); - rt6_dst_from_metrics_check(pcpu_rt); return pcpu_rt; } @@ -1158,9 +1263,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, return; net = dev_net(rt6_ex->rt6i->dst.dev); - rt6_ex->rt6i->rt6i_node = NULL; hlist_del_rcu(&rt6_ex->hlist); - rt6_release(rt6_ex->rt6i); + dst_release(&rt6_ex->rt6i->dst); kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; @@ -1268,20 +1372,36 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, return NULL; } +static unsigned int fib6_mtu(const struct fib6_info *rt) +{ + unsigned int mtu; + + if (rt->fib6_pmtu) { + mtu = rt->fib6_pmtu; + } else { + struct net_device *dev = fib6_info_nh_dev(rt); + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + } + + mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + + return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); +} + static int rt6_insert_exception(struct rt6_info *nrt, - struct rt6_info *ort) + struct fib6_info *ort) { - struct net *net = dev_net(ort->dst.dev); + struct net *net = dev_net(nrt->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; int err = 0; - /* ort can't be a cache or pcpu route */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); - spin_lock_bh(&rt6_exception_lock); if (ort->exception_bucket_flushed) { @@ -1308,19 +1428,19 @@ static int rt6_insert_exception(struct rt6_info *nrt, * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (ort->rt6i_src.plen) + if (ort->fib6_src.plen) src_key = &nrt->rt6i_src.addr; #endif /* Update rt6i_prefsrc as it could be changed * in rt6_remove_prefsrc() */ - nrt->rt6i_prefsrc = ort->rt6i_prefsrc; + nrt->rt6i_prefsrc = ort->fib6_prefsrc; /* rt6_mtu_change() might lower mtu on ort. * Only insert this exception route if its mtu * is less than ort's mtu value. */ - if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { + if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) { err = -EINVAL; goto out; } @@ -1337,8 +1457,6 @@ static int rt6_insert_exception(struct rt6_info *nrt, } rt6_ex->rt6i = nrt; rt6_ex->stamp = jiffies; - atomic_inc(&nrt->rt6i_ref); - nrt->rt6i_node = ort->rt6i_node; hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; @@ -1351,16 +1469,16 @@ out: /* Update fn->fn_sernum to invalidate all cached dst */ if (!err) { - spin_lock_bh(&ort->rt6i_table->tb6_lock); - fib6_update_sernum(ort); - spin_unlock_bh(&ort->rt6i_table->tb6_lock); + spin_lock_bh(&ort->fib6_table->tb6_lock); + fib6_update_sernum(net, ort); + spin_unlock_bh(&ort->fib6_table->tb6_lock); fib6_force_start_gc(net); } return err; } -void rt6_flush_exceptions(struct rt6_info *rt) +void rt6_flush_exceptions(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1390,7 +1508,7 @@ out: /* Find cached rt in the hash table inside passed in rt * Caller has to hold rcu_read_lock() */ -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -1408,7 +1526,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (rt->rt6i_src.plen) + if (rt->fib6_src.plen) src_key = saddr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); @@ -1420,14 +1538,15 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, } /* Remove the passed in cached rt from the hash table that contains it */ -int rt6_remove_exception_rt(struct rt6_info *rt) +static int rt6_remove_exception_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; + struct fib6_info *from; int err; + from = rcu_dereference(rt->from); if (!from || !(rt->rt6i_flags & RTF_CACHE)) return -EINVAL; @@ -1445,7 +1564,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->rt6i_src.plen) + if (from->fib6_src.plen) src_key = &rt->rt6i_src.addr; #endif rt6_ex = __rt6_find_exception_spinlock(&bucket, @@ -1468,7 +1587,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; @@ -1486,7 +1605,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->rt6i_src.plen) + if (from->fib6_src.plen) src_key = &rt->rt6i_src.addr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, @@ -1498,7 +1617,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) rcu_read_unlock(); } -static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) +static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1540,7 +1659,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, } static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, - struct rt6_info *rt, int mtu) + struct fib6_info *rt, int mtu) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1557,12 +1676,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, struct rt6_info *entry = rt6_ex->rt6i; /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected - * route), the metrics of its rt->dst.from have already + * route), the metrics of its rt->from have already * been updated. */ - if (entry->rt6i_pmtu && + if (dst_metric_raw(&entry->dst, RTAX_MTU) && rt6_mtu_change_route_allowed(idev, entry, mtu)) - entry->rt6i_pmtu = mtu; + dst_metric_set(&entry->dst, RTAX_MTU, mtu); } bucket++; } @@ -1570,7 +1689,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) -static void rt6_exceptions_clean_tohost(struct rt6_info *rt, +static void rt6_exceptions_clean_tohost(struct fib6_info *rt, struct in6_addr *gateway) { struct rt6_exception_bucket *bucket; @@ -1649,7 +1768,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, gc_args->more++; } -void rt6_age_exceptions(struct rt6_info *rt, +void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, unsigned long now) { @@ -1685,7 +1804,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *rt_cache; + struct fib6_info *f6i; + struct rt6_info *rt; int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -1702,10 +1822,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - rt = rt6_select(net, fn, oif, strict); - if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); - if (rt == net->ipv6.ip6_null_entry) { + f6i = rt6_select(net, fn, oif, strict); + if (f6i->fib6_nsiblings) + f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict); + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@ -1717,45 +1837,35 @@ redo_rt6_select: } } - /*Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } else if (rt->rt6i_flags & RTF_CACHE) { - if (ip6_hold_safe(net, &rt, true)) { + } + + /*Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); - rt6_dst_from_metrics_check(rt); - } + rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(rt->rt6i_flags & RTF_GATEWAY))) { + !(f6i->fib6_flags & RTF_GATEWAY))) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different * from the fl6->daddr used to look-up route here. */ - struct rt6_info *uncached_rt; - if (ip6_hold_safe(net, &rt, true)) { - dst_use_noref(&rt->dst, jiffies); - } else { - rcu_read_unlock(); - uncached_rt = rt; - goto uncached_rt_out; - } - rcu_read_unlock(); + uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); - uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); - dst_release(&rt->dst); + rcu_read_unlock(); if (uncached_rt) { /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() @@ -1768,7 +1878,6 @@ redo_rt6_select: dst_hold(&uncached_rt->dst); } -uncached_rt_out: trace_fib6_table_lookup(net, uncached_rt, table, fl6); return uncached_rt; @@ -1777,24 +1886,12 @@ uncached_rt_out: struct rt6_info *pcpu_rt; - dst_use_noref(&rt->dst, jiffies); local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(rt); - - if (!pcpu_rt) { - /* atomic_inc_not_zero() is needed when using rcu */ - if (atomic_inc_not_zero(&rt->rt6i_ref)) { - /* No dst_hold() on rt is needed because grabbing - * rt->rt6i_ref makes sure rt can't be released. - */ - pcpu_rt = rt6_make_pcpu_route(rt); - rt6_release(rt); - } else { - /* rt is already removed from tree */ - pcpu_rt = net->ipv6.ip6_null_entry; - dst_hold(&pcpu_rt->dst); - } - } + pcpu_rt = rt6_get_pcpu_route(f6i); + + if (!pcpu_rt) + pcpu_rt = rt6_make_pcpu_route(net, f6i); + local_bh_enable(); rcu_read_unlock(); trace_fib6_table_lookup(net, pcpu_rt, table, fl6); @@ -2020,7 +2117,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_idev = in6_dev_get(loopback_dev); rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; - rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); #ifdef CONFIG_IPV6_SUBTREES @@ -2036,18 +2132,27 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ -static void rt6_dst_from_metrics_check(struct rt6_info *rt) +static bool fib6_check(struct fib6_info *f6i, u32 cookie) { - if (rt->from && - dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) - dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); + u32 rt_cookie = 0; + + if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie) + return false; + + if (fib6_check_expired(f6i)) + return false; + + return true; } -static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) +static struct dst_entry *rt6_check(struct rt6_info *rt, + struct fib6_info *from, + u32 cookie) { u32 rt_cookie = 0; - if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) + if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) || + rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -2056,11 +2161,13 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) return &rt->dst; } -static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) +static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, + struct fib6_info *from, + u32 cookie) { if (!__rt6_check_expired(rt) && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && - rt6_check(rt->from, cookie)) + fib6_check(from, cookie)) return &rt->dst; else return NULL; @@ -2068,22 +2175,30 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { + struct dst_entry *dst_ret; + struct fib6_info *from; struct rt6_info *rt; - rt = (struct rt6_info *) dst; + rt = container_of(dst, struct rt6_info, dst); + + rcu_read_lock(); /* All IPV6 dsts are created with ->obsolete set to the value * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. */ - rt6_dst_from_metrics_check(rt); + from = rcu_dereference(rt->from); - if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) - return rt6_dst_from_check(rt, cookie); + if (from && (rt->rt6i_flags & RTF_PCPU || + unlikely(!list_empty(&rt->rt6i_uncached)))) + dst_ret = rt6_dst_from_check(rt, from, cookie); else - return rt6_check(rt, cookie); + dst_ret = rt6_check(rt, from, cookie); + + rcu_read_unlock(); + + return dst_ret; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -2092,10 +2207,12 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { + rcu_read_lock(); if (rt6_check_expired(rt)) { - ip6_del_rt(rt); + rt6_remove_exception_rt(rt); dst = NULL; } + rcu_read_unlock(); } else { dst_release(dst); dst = NULL; @@ -2112,35 +2229,60 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { + rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { if (dst_hold_safe(&rt->dst)) - ip6_del_rt(rt); + rt6_remove_exception_rt(rt); } else { + struct fib6_info *from; struct fib6_node *fn; - rcu_read_lock(); - fn = rcu_dereference(rt->rt6i_node); - if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; - rcu_read_unlock(); + from = rcu_dereference(rt->from); + if (from) { + fn = rcu_dereference(from->fib6_node); + if (fn && (rt->rt6i_flags & RTF_DEFAULT)) + fn->fn_sernum = -1; + } } + rcu_read_unlock(); } } +static void rt6_update_expires(struct rt6_info *rt0, int timeout) +{ + if (!(rt0->rt6i_flags & RTF_EXPIRES)) { + struct fib6_info *from; + + rcu_read_lock(); + from = rcu_dereference(rt0->from); + if (from) + rt0->dst.expires = from->expires; + rcu_read_unlock(); + } + + dst_set_expires(&rt0->dst, timeout); + rt0->rt6i_flags |= RTF_EXPIRES; +} + static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) { struct net *net = dev_net(rt->dst.dev); + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt->rt6i_flags |= RTF_MODIFIED; - rt->rt6i_pmtu = mtu; rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); } static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { + bool from_set; + + rcu_read_lock(); + from_set = !!rcu_dereference(rt->from); + rcu_read_unlock(); + return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || - rcu_access_pointer(rt->rt6i_node)); + (rt->rt6i_flags & RTF_PCPU || from_set); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -2176,14 +2318,18 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6->rt6i_flags & RTF_CACHE) rt6_update_exception_stamp_rt(rt6); } else if (daddr) { + struct fib6_info *from; struct rt6_info *nrt6; - nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); + rcu_read_lock(); + from = rcu_dereference(rt6->from); + nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - if (rt6_insert_exception(nrt6, rt6)) + if (rt6_insert_exception(nrt6, from)) dst_release_immediate(&nrt6->dst); } + rcu_read_unlock(); } } @@ -2264,7 +2410,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt, *rt_cache; + struct rt6_info *ret = NULL, *rt_cache; + struct fib6_info *rt; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -2281,29 +2428,29 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) continue; - if (rt->dst.error) + if (rt->fib6_flags & RTF_REJECT) break; - if (!(rt->rt6i_flags & RTF_GATEWAY)) + if (!(rt->fib6_flags & RTF_GATEWAY)) continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) + if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) continue; /* rt_cache's gateway might be different from its 'parent' * in the case of an ip redirect. * So we keep searching in the exception table if the gateway * is different. */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { + if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); if (rt_cache && ipv6_addr_equal(&rdfl->gateway, &rt_cache->rt6i_gateway)) { - rt = rt_cache; + ret = rt_cache; break; } continue; @@ -2312,25 +2459,28 @@ restart: } if (!rt) - rt = net->ipv6.ip6_null_entry; - else if (rt->dst.error) { - rt = net->ipv6.ip6_null_entry; + rt = net->ipv6.fib6_null_entry; + else if (rt->fib6_flags & RTF_REJECT) { + ret = net->ipv6.ip6_null_entry; goto out; } - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } out: - ip6_hold_safe(net, &rt, true); + if (ret) + dst_hold(&ret->dst); + else + ret = ip6_create_rt_rcu(rt); rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); - return rt; + trace_fib6_table_lookup(net, ret, table, fl6); + return ret; }; static struct dst_entry *ip6_route_redirect(struct net *net, @@ -2422,12 +2572,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - - if (mtu) - goto out; + unsigned int mtu; mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) @@ -2511,60 +2657,22 @@ out: return entries > rt_max_size; } -static int ip6_convert_metrics(struct mx6_config *mxc, - const struct fib6_config *cfg) +static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, + struct fib6_config *cfg) { - struct net *net = cfg->fc_nlinfo.nl_net; - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - u32 *mp; + struct dst_metrics *p; if (!cfg->fc_mx) return 0; - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (unlikely(!mp)) + p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL); + if (unlikely(!p)) return -ENOMEM; - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (unlikely(type > RTAX_MAX)) - goto err; - - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; - - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - goto err; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - goto err; + refcount_set(&p->refcnt, 1); + rt->fib6_metrics = p; - mp[type - 1] = val; - __set_bit(type - 1, mxc->mx_valid); - } - - if (ecn_ca) { - __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); - mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; - } - - mxc->mx = mp; - return 0; - err: - kfree(mp); - return -EINVAL; + return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics); } static struct rt6_info *ip6_nh_lookup_table(struct net *net, @@ -2750,11 +2858,12 @@ out: return err; } -static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, +static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, + gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; @@ -2773,6 +2882,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; @@ -2831,35 +2945,30 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, - (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); + err = -ENOMEM; + rt = fib6_info_alloc(gfp_flags); + if (!rt) + goto out; - if (!rt) { - err = -ENOMEM; + if (cfg->fc_flags & RTF_ADDRCONF) + rt->dst_nocount = true; + + err = ip6_convert_metrics(net, rt, cfg); + if (err < 0) goto out; - } if (cfg->fc_flags & RTF_EXPIRES) - rt6_set_expires(rt, jiffies + + fib6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); else - rt6_clean_expires(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; - rt->rt6i_protocol = cfg->fc_protocol; + rt->fib6_protocol = cfg->fc_protocol; addr_type = ipv6_addr_type(&cfg->fc_dst); - if (addr_type & IPV6_ADDR_MULTICAST) - rt->dst.input = ip6_mc_input; - else if (cfg->fc_flags & RTF_LOCAL) - rt->dst.input = ip6_input; - else - rt->dst.input = ip6_forward; - - rt->dst.output = ip6_output; - if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; @@ -2868,22 +2977,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, &lwtstate, extack); if (err) goto out; - rt->dst.lwtstate = lwtstate_get(lwtstate); - lwtunnel_set_redirect(&rt->dst); + rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate); } - ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); - rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; + ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); + rt->fib6_dst.plen = cfg->fc_dst_len; + if (rt->fib6_dst.plen == 128) + rt->dst_host = true; #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); - rt->rt6i_src.plen = cfg->fc_src_len; + ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); + rt->fib6_src.plen = cfg->fc_src_len; #endif - rt->rt6i_metric = cfg->fc_metric; - rt->rt6i_nh_weight = 1; + rt->fib6_metric = cfg->fc_metric; + rt->fib6_nh.nh_weight = 1; + + rt->fib6_type = cfg->fc_type; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes @@ -2906,28 +3016,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } } - rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; - switch (cfg->fc_type) { - case RTN_BLACKHOLE: - rt->dst.error = -EINVAL; - rt->dst.output = dst_discard_out; - rt->dst.input = dst_discard; - break; - case RTN_PROHIBIT: - rt->dst.error = -EACCES; - rt->dst.output = ip6_pkt_prohibit_out; - rt->dst.input = ip6_pkt_prohibit; - break; - case RTN_THROW: - case RTN_UNREACHABLE: - default: - rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN - : (cfg->fc_type == RTN_UNREACHABLE) - ? -EHOSTUNREACH : -ENETUNREACH; - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; - break; - } + rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP; goto install_route; } @@ -2936,7 +3025,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (err) goto out; - rt->rt6i_gateway = cfg->fc_gateway; + rt->fib6_nh.nh_gw = cfg->fc_gateway; } err = -ENODEV; @@ -2961,96 +3050,82 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, err = -EINVAL; goto out; } - rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; - rt->rt6i_prefsrc.plen = 128; + rt->fib6_prefsrc.addr = cfg->fc_prefsrc; + rt->fib6_prefsrc.plen = 128; } else - rt->rt6i_prefsrc.plen = 0; + rt->fib6_prefsrc.plen = 0; - rt->rt6i_flags = cfg->fc_flags; + rt->fib6_flags = cfg->fc_flags; install_route: - if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && + if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) && !netif_carrier_ok(dev)) - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; - rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); - rt->dst.dev = dev; - rt->rt6i_idev = idev; - rt->rt6i_table = table; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); + rt->fib6_nh.nh_dev = dev; + rt->fib6_table = table; cfg->fc_nlinfo.nl_net = dev_net(dev); + if (idev) + in6_dev_put(idev); + return rt; out: if (dev) dev_put(dev); if (idev) in6_dev_put(idev); - if (rt) - dst_release_immediate(&rt->dst); + fib6_info_release(rt); return ERR_PTR(err); } -int ip6_route_add(struct fib6_config *cfg, +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { - struct mx6_config mxc = { .mx = NULL, }; - struct rt6_info *rt; + struct fib6_info *rt; int err; - rt = ip6_route_info_create(cfg, extack); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; - goto out; - } + rt = ip6_route_info_create(cfg, gfp_flags, extack); + if (IS_ERR(rt)) + return PTR_ERR(rt); - err = ip6_convert_metrics(&mxc, cfg); - if (err) - goto out; - - err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); - - kfree(mxc.mx); - - return err; -out: - if (rt) - dst_release_immediate(&rt->dst); + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); + fib6_info_release(rt); return err; } -static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info) { - int err; + struct net *net = info->nl_net; struct fib6_table *table; - struct net *net = dev_net(rt->dst.dev); + int err; - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { err = -ENOENT; goto out; } - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); spin_unlock_bh(&table->tb6_lock); out: - ip6_rt_put(rt); + fib6_info_release(rt); return err; } -int ip6_del_rt(struct rt6_info *rt) +int ip6_del_rt(struct net *net, struct fib6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; + struct nl_info info = { .nl_net = net }; + return __ip6_del_rt(rt, &info); } -static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) +static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) { struct nl_info *info = &cfg->fc_nlinfo; struct net *net = info->nl_net; @@ -3058,20 +3133,20 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) struct fib6_table *table; int err = -ENOENT; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) goto out_put; - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); - if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { + struct fib6_info *sibling, *next_sibling; /* prefer to send a single notification with all hops */ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (skb) { u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; - if (rt6_fill_node(net, skb, rt, + if (rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, RTM_DELROUTE, info->portid, seq, 0) < 0) { kfree_skb(skb); @@ -3081,8 +3156,8 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) } list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, - rt6i_siblings) { + &rt->fib6_siblings, + fib6_siblings) { err = fib6_del(sibling, info); if (err) goto out_unlock; @@ -3093,7 +3168,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) out_unlock: spin_unlock_bh(&table->tb6_lock); out_put: - ip6_rt_put(rt); + fib6_info_release(rt); if (skb) { rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, @@ -3102,11 +3177,28 @@ out_put: return err; } +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) +{ + int rc = -ESRCH; + + if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) + goto out; + + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + goto out; + if (dst_hold_safe(&rt->dst)) + rc = rt6_remove_exception_rt(rt); +out: + return rc; +} + static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt, *rt_cache; + struct rt6_info *rt_cache; struct fib6_table *table; + struct fib6_info *rt; struct fib6_node *fn; int err = -ESRCH; @@ -3126,25 +3218,29 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { if (cfg->fc_flags & RTF_CACHE) { + int rc; + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, &cfg->fc_src); - if (!rt_cache) - continue; - rt = rt_cache; + if (rt_cache) { + rc = ip6_del_cached_rt(rt_cache, cfg); + if (rc != -ESRCH) + return rc; + } + continue; } if (cfg->fc_ifindex && - (!rt->dst.dev || - rt->dst.dev->ifindex != cfg->fc_ifindex)) + (!rt->fib6_nh.nh_dev || + rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) continue; - if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) + if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) continue; - if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) + if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol) continue; - if (!dst_hold_safe(&rt->dst)) - break; + fib6_info_hold(rt); rcu_read_unlock(); /* if gateway was specified only delete the one hop */ @@ -3166,6 +3262,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu struct ndisc_options ndopts; struct inet6_dev *in6_dev; struct neighbour *neigh; + struct fib6_info *from; struct rd_msg *msg; int optlen, on_link; u8 *lladdr; @@ -3247,7 +3344,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)), NDISC_REDIRECT, &ndopts); - nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); + rcu_read_lock(); + from = rcu_dereference(rt->from); + fib6_info_hold(from); + rcu_read_unlock(); + + nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); if (!nrt) goto out; @@ -3255,14 +3357,13 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; /* No need to remove rt from the exception table if rt is * a cached route because rt6_insert_exception() will * takes care of it */ - if (rt6_insert_exception(nrt, rt)) { + if (rt6_insert_exception(nrt, from)) { dst_release_immediate(&nrt->dst); goto out; } @@ -3274,47 +3375,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); out: + fib6_info_release(from); neigh_release(neigh); } -/* - * Misc support functions - */ - -static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - BUG_ON(from->from); - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(&from->dst); - rt->from = from; - dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); -} - -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) -{ - rt->dst.input = ort->dst.input; - rt->dst.output = ort->dst.output; - rt->rt6i_dst = ort->rt6i_dst; - rt->dst.error = ort->dst.error; - rt->rt6i_idev = ort->rt6i_idev; - if (rt->rt6i_idev) - in6_dev_hold(rt->rt6i_idev); - rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags; - rt6_set_from(rt, ort); - rt->rt6i_metric = ort->rt6i_metric; -#ifdef CONFIG_IPV6_SUBTREES - rt->rt6i_src = ort->rt6i_src; -#endif - rt->rt6i_prefsrc = ort->rt6i_prefsrc; - rt->rt6i_table = ort->rt6i_table; - rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); -} - #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev) @@ -3322,7 +3388,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; int ifindex = dev->ifindex; struct fib6_node *fn; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct fib6_table *table; table = fib6_get_table(net, tb_id); @@ -3335,13 +3401,13 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->dst.dev->ifindex != ifindex) + if (rt->fib6_nh.nh_dev->ifindex != ifindex) continue; - if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) + if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; - if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) continue; - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); break; } out: @@ -3349,7 +3415,7 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, @@ -3362,6 +3428,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, @@ -3375,36 +3442,39 @@ static struct rt6_info *rt6_add_route_info(struct net *net, if (!prefixlen) cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, GFP_ATOMIC, NULL); return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, + struct net_device *dev) { u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), tb_id); + table = fib6_get_table(net, tb_id); if (!table) return NULL; rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->dst.dev && - ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->rt6i_gateway, addr)) + if (dev == rt->fib6_nh.nh_dev && + ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && + ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) break; } if (rt) - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); rcu_read_unlock(); return rt; } -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { @@ -3415,14 +3485,15 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = dev_net(dev), + .fc_nlinfo.nl_net = net, }; cfg.fc_gateway = *gwaddr; - if (!ip6_route_add(&cfg, NULL)) { + if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) { struct fib6_table *table; table = fib6_get_table(dev_net(dev), cfg.fc_table); @@ -3430,24 +3501,25 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; } - return rt6_get_dflt_router(gwaddr, dev); + return rt6_get_dflt_router(net, gwaddr, dev); } -static void __rt6_purge_dflt_routers(struct fib6_table *table) +static void __rt6_purge_dflt_routers(struct net *net, + struct fib6_table *table) { - struct rt6_info *rt; + struct fib6_info *rt; restart: rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && - (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - if (dst_hold_safe(&rt->dst)) { - rcu_read_unlock(); - ip6_del_rt(rt); - } else { - rcu_read_unlock(); - } + struct net_device *dev = fib6_info_nh_dev(rt); + struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL; + + if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!idev || idev->cnf.accept_ra != 2)) { + fib6_info_hold(rt); + rcu_read_unlock(); + ip6_del_rt(net, rt); goto restart; } } @@ -3468,7 +3540,7 @@ void rt6_purge_dflt_routers(struct net *net) head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, head, tb6_hlist) { if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) - __rt6_purge_dflt_routers(table); + __rt6_purge_dflt_routers(net, table); } } @@ -3489,6 +3561,7 @@ static void rtmsg_to_fib6_config(struct net *net, cfg->fc_dst_len = rtmsg->rtmsg_dst_len; cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; + cfg->fc_type = rtmsg->rtmsg_type; cfg->fc_nlinfo.nl_net = net; @@ -3518,7 +3591,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&cfg, NULL); + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); break; case SIOCDELRT: err = ip6_route_del(&cfg, NULL); @@ -3546,7 +3619,8 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { - IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IP6_INC_STATS(dev_net(dst->dev), + __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INADDRERRORS); break; } @@ -3587,40 +3661,40 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff * Allocate a dst for local (unicast / anycast) address. */ -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, - bool anycast) +struct fib6_info *addrconf_f6i_alloc(struct net *net, + struct inet6_dev *idev, + const struct in6_addr *addr, + bool anycast, gfp_t gfp_flags) { u32 tb_id; - struct net *net = dev_net(idev->dev); struct net_device *dev = idev->dev; - struct rt6_info *rt; + struct fib6_info *f6i; - rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); - if (!rt) + f6i = fib6_info_alloc(gfp_flags); + if (!f6i) return ERR_PTR(-ENOMEM); - in6_dev_hold(idev); - - rt->dst.flags |= DST_HOST; - rt->dst.input = ip6_input; - rt->dst.output = ip6_output; - rt->rt6i_idev = idev; - - rt->rt6i_protocol = RTPROT_KERNEL; - rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; - if (anycast) - rt->rt6i_flags |= RTF_ANYCAST; - else - rt->rt6i_flags |= RTF_LOCAL; + f6i->dst_nocount = true; + f6i->dst_host = true; + f6i->fib6_protocol = RTPROT_KERNEL; + f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP; + if (anycast) { + f6i->fib6_type = RTN_ANYCAST; + f6i->fib6_flags |= RTF_ANYCAST; + } else { + f6i->fib6_type = RTN_LOCAL; + f6i->fib6_flags |= RTF_LOCAL; + } - rt->rt6i_gateway = *addr; - rt->rt6i_dst.addr = *addr; - rt->rt6i_dst.plen = 128; + f6i->fib6_nh.nh_gw = *addr; + dev_hold(dev); + f6i->fib6_nh.nh_dev = dev; + f6i->fib6_dst.addr = *addr; + f6i->fib6_dst.plen = 128; tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - rt->rt6i_table = fib6_get_table(net, tb_id); + f6i->fib6_table = fib6_get_table(net, tb_id); - return rt; + return f6i; } /* remove deleted ip from prefsrc entries */ @@ -3630,18 +3704,18 @@ struct arg_dev_net_ip { struct in6_addr *addr; }; -static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) +static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) { struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->dst.dev == dev || !dev) && - rt != net->ipv6.ip6_null_entry && - ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { + if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + rt != net->ipv6.fib6_null_entry && + ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); /* remove prefsrc entry */ - rt->rt6i_prefsrc.plen = 0; + rt->fib6_prefsrc.plen = 0; /* need to update cache as well */ rt6_exceptions_remove_prefsrc(rt); spin_unlock_bh(&rt6_exception_lock); @@ -3663,12 +3737,12 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) /* Remove routers and update dst entries when gateway turn into host. */ -static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +static int fib6_clean_tohost(struct fib6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; - if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && + ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { return -1; } @@ -3694,85 +3768,85 @@ struct arg_netdev_event { }; }; -static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) +static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; struct fib6_node *fn; - fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); iter = rcu_dereference_protected(fn->leaf, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); while (iter) { - if (iter->rt6i_metric == rt->rt6i_metric && + if (iter->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(iter)) return iter; iter = rcu_dereference_protected(iter->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } return NULL; } -static bool rt6_is_dead(const struct rt6_info *rt) +static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD || - (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && - rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && + fib6_ignore_linkdown(rt))) return true; return false; } -static int rt6_multipath_total_weight(const struct rt6_info *rt) +static int rt6_multipath_total_weight(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; int total = 0; if (!rt6_is_dead(rt)) - total += rt->rt6i_nh_weight; + total += rt->fib6_nh.nh_weight; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (!rt6_is_dead(iter)) - total += iter->rt6i_nh_weight; + total += iter->fib6_nh.nh_weight; } return total; } -static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) +static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) { int upper_bound = -1; if (!rt6_is_dead(rt)) { - *weight += rt->rt6i_nh_weight; + *weight += rt->fib6_nh.nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); } -static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) +static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) { - struct rt6_info *iter; + struct fib6_info *iter; int weight = 0; rt6_upper_bound_set(rt, &weight, total); - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) rt6_upper_bound_set(iter, &weight, total); } -void rt6_multipath_rebalance(struct rt6_info *rt) +void rt6_multipath_rebalance(struct fib6_info *rt) { - struct rt6_info *first; + struct fib6_info *first; int total; /* In case the entire multipath route was marked for flushing, * then there is no need to rebalance upon the removal of every * sibling route. */ - if (!rt->rt6i_nsiblings || rt->should_flush) + if (!rt->fib6_nsiblings || rt->should_flush) return; /* During lookup routes are evaluated in order, so we need to @@ -3787,14 +3861,14 @@ void rt6_multipath_rebalance(struct rt6_info *rt) rt6_multipath_upper_bound_set(first, total); } -static int fib6_ifup(struct rt6_info *rt, void *p_arg) +static int fib6_ifup(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; - const struct net *net = dev_net(arg->dev); + struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { - rt->rt6i_nh_flags &= ~arg->nh_flags; - fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); + if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { + rt->fib6_nh.nh_flags &= ~arg->nh_flags; + fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@ -3816,95 +3890,96 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) fib6_clean_all(dev_net(dev), fib6_ifup, &arg); } -static bool rt6_multipath_uses_dev(const struct rt6_info *rt, +static bool rt6_multipath_uses_dev(const struct fib6_info *rt, const struct net_device *dev) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) + if (rt->fib6_nh.nh_dev == dev) return true; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == dev) return true; return false; } -static void rt6_multipath_flush(struct rt6_info *rt) +static void rt6_multipath_flush(struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; rt->should_flush = 1; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) iter->should_flush = 1; } -static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, +static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, const struct net_device *down_dev) { - struct rt6_info *iter; + struct fib6_info *iter; unsigned int dead = 0; - if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_dev == down_dev || + rt->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == down_dev || - iter->rt6i_nh_flags & RTNH_F_DEAD) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == down_dev || + iter->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; return dead; } -static void rt6_multipath_nh_flags_set(struct rt6_info *rt, +static void rt6_multipath_nh_flags_set(struct fib6_info *rt, const struct net_device *dev, unsigned int nh_flags) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) - rt->rt6i_nh_flags |= nh_flags; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) - iter->rt6i_nh_flags |= nh_flags; + if (rt->fib6_nh.nh_dev == dev) + rt->fib6_nh.nh_flags |= nh_flags; + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == dev) + iter->fib6_nh.nh_flags |= nh_flags; } /* called with write lock held for table with rt */ -static int fib6_ifdown(struct rt6_info *rt, void *p_arg) +static int fib6_ifdown(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; const struct net_device *dev = arg->dev; - const struct net *net = dev_net(dev); + struct net *net = dev_net(dev); - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; switch (arg->event) { case NETDEV_UNREGISTER: - return rt->dst.dev == dev ? -1 : 0; + return rt->fib6_nh.nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; - if (!rt->rt6i_nsiblings) - return rt->dst.dev == dev ? -1 : 0; + if (!rt->fib6_nsiblings) + return rt->fib6_nh.nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count; count = rt6_multipath_dead_count(rt, dev); - if (rt->rt6i_nsiblings + 1 == count) { + if (rt->fib6_nsiblings + 1 == count) { rt6_multipath_flush(rt); return -1; } rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - fib6_update_sernum(rt); + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); } return -2; case NETDEV_CHANGE: - if (rt->dst.dev != dev || - rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + if (rt->fib6_nh.nh_dev != dev || + rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@ -3936,7 +4011,7 @@ struct rt6_mtu_change_arg { unsigned int mtu; }; -static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) +static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; @@ -3956,12 +4031,15 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->fib6_nh.nh_dev == arg->dev && + !fib6_metric_locked(rt, RTAX_MTU)) { + u32 mtu = rt->fib6_pmtu; + + if (mtu >= arg->mtu || + (mtu < arg->mtu && mtu == idev->cnf.mtu6)) + fib6_metric_set(rt, RTAX_MTU, arg->mtu); + spin_lock_bh(&rt6_exception_lock); - if (dst_metric_raw(&rt->dst, RTAX_MTU) && - rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); rt6_exceptions_update_pmtu(idev, rt, arg->mtu); spin_unlock_bh(&rt6_exception_lock); } @@ -4122,9 +4200,8 @@ errout: } struct rt6_nh { - struct rt6_info *rt6_info; + struct fib6_info *fib6_info; struct fib6_config r_cfg; - struct mx6_config mxc; struct list_head next; }; @@ -4139,23 +4216,25 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) } } -static int ip6_route_info_append(struct list_head *rt6_nh_list, - struct rt6_info *rt, struct fib6_config *r_cfg) +static int ip6_route_info_append(struct net *net, + struct list_head *rt6_nh_list, + struct fib6_info *rt, + struct fib6_config *r_cfg) { struct rt6_nh *nh; int err = -EEXIST; list_for_each_entry(nh, rt6_nh_list, next) { - /* check if rt6_info already exists */ - if (rt6_duplicate_nexthop(nh->rt6_info, rt)) + /* check if fib6_info already exists */ + if (rt6_duplicate_nexthop(nh->fib6_info, rt)) return err; } nh = kzalloc(sizeof(*nh), GFP_KERNEL); if (!nh) return -ENOMEM; - nh->rt6_info = rt; - err = ip6_convert_metrics(&nh->mxc, r_cfg); + nh->fib6_info = rt; + err = ip6_convert_metrics(net, rt, r_cfg); if (err) { kfree(nh); return err; @@ -4166,8 +4245,8 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, return 0; } -static void ip6_route_mpath_notify(struct rt6_info *rt, - struct rt6_info *rt_last, +static void ip6_route_mpath_notify(struct fib6_info *rt, + struct fib6_info *rt_last, struct nl_info *info, __u16 nlflags) { @@ -4177,10 +4256,10 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, * nexthop. Since sibling routes are always added at the end of * the list, find the first sibling of the last route appended */ - if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { - rt = list_first_entry(&rt_last->rt6i_siblings, - struct rt6_info, - rt6i_siblings); + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { + rt = list_first_entry(&rt_last->fib6_siblings, + struct fib6_info, + fib6_siblings); } if (rt) @@ -4190,11 +4269,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt_notif = NULL, *rt_last = NULL; + struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; struct fib6_config r_cfg; struct rtnexthop *rtnh; - struct rt6_info *rt; + struct fib6_info *rt; struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; __u16 nlflags; @@ -4214,7 +4293,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, rtnh = (struct rtnexthop *)cfg->fc_mp; /* Parse a Multipath Entry and build a list (rt6_nh_list) of - * rt6_info structs per nexthop + * fib6_info structs per nexthop */ while (rtnh_ok(rtnh, remaining)) { memcpy(&r_cfg, cfg, sizeof(*cfg)); @@ -4237,18 +4316,19 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, } r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); - rt = ip6_route_info_create(&r_cfg, extack); + rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto cleanup; } - rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; - err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); + err = ip6_route_info_append(info->nl_net, &rt6_nh_list, + rt, &r_cfg); if (err) { - dst_release_immediate(&rt->dst); + fib6_info_release(rt); goto cleanup; } @@ -4263,14 +4343,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - rt_last = nh->rt6_info; - err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); + rt_last = nh->fib6_info; + err = __ip6_ins_rt(nh->fib6_info, info, extack); + fib6_info_release(nh->fib6_info); + /* save reference to first route for notification */ if (!rt_notif && !err) - rt_notif = nh->rt6_info; + rt_notif = nh->fib6_info; - /* nh->rt6_info is used or freed at this point, reset to NULL*/ - nh->rt6_info = NULL; + /* nh->fib6_info is used or freed at this point, reset to NULL*/ + nh->fib6_info = NULL; if (err) { if (replace && nhn) ip6_print_replace_route_err(&rt6_nh_list); @@ -4311,9 +4393,8 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->rt6_info) - dst_release_immediate(&nh->rt6_info->dst); - kfree(nh->mxc.mx); + if (nh->fib6_info) + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } @@ -4390,20 +4471,20 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (cfg.fc_mp) return ip6_route_multipath_add(&cfg, extack); else - return ip6_route_add(&cfg, extack); + return ip6_route_add(&cfg, GFP_KERNEL, extack); } -static size_t rt6_nlmsg_size(struct rt6_info *rt) +static size_t rt6_nlmsg_size(struct fib6_info *rt) { int nexthop_len = 0; - if (rt->rt6i_nsiblings) { + if (rt->fib6_nsiblings) { nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->dst.lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); - nexthop_len *= rt->rt6i_nsiblings; + nexthop_len *= rt->fib6_nsiblings; } return NLMSG_ALIGN(sizeof(struct rtmsg)) @@ -4419,38 +4500,41 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->dst.lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + nexthop_len; } -static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, unsigned int *flags, bool skip_oif) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; - if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { + if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; - if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) + + rcu_read_lock(); + if (fib6_ignore_linkdown(rt)) *flags |= RTNH_F_DEAD; + rcu_read_unlock(); } - if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) + if (rt->fib6_flags & RTF_GATEWAY) { + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0) goto nla_put_failure; } - *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); - if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) + *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); + if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && rt->dst.dev && - nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) + if (!skip_oif && rt->fib6_nh.nh_dev && + nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex)) goto nla_put_failure; - if (rt->dst.lwtstate && - lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + if (rt->fib6_nh.nh_lwtstate && + lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0) goto nla_put_failure; return 0; @@ -4460,8 +4544,9 @@ nla_put_failure: } /* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) +static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) { + const struct net_device *dev = rt->fib6_nh.nh_dev; struct rtnexthop *rtnh; unsigned int flags = 0; @@ -4469,8 +4554,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) if (!rtnh) goto nla_put_failure; - rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; - rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; + rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; + rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; if (rt6_nexthop_info(skb, rt, &flags, true) < 0) goto nla_put_failure; @@ -4486,16 +4571,16 @@ nla_put_failure: return -EMSGSIZE; } -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; - long expires; + long expires = 0; + u32 *pmetrics; u32 table; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); @@ -4504,53 +4589,31 @@ static int rt6_fill_node(struct net *net, rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; - rtm->rtm_dst_len = rt->rt6i_dst.plen; - rtm->rtm_src_len = rt->rt6i_src.plen; + rtm->rtm_dst_len = rt->fib6_dst.plen; + rtm->rtm_src_len = rt->fib6_src.plen; rtm->rtm_tos = 0; - if (rt->rt6i_table) - table = rt->rt6i_table->tb6_id; + if (rt->fib6_table) + table = rt->fib6_table->tb6_id; else table = RT6_TABLE_UNSPEC; rtm->rtm_table = table; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; - if (rt->rt6i_flags & RTF_REJECT) { - switch (rt->dst.error) { - case -EINVAL: - rtm->rtm_type = RTN_BLACKHOLE; - break; - case -EACCES: - rtm->rtm_type = RTN_PROHIBIT; - break; - case -EAGAIN: - rtm->rtm_type = RTN_THROW; - break; - default: - rtm->rtm_type = RTN_UNREACHABLE; - break; - } - } - else if (rt->rt6i_flags & RTF_LOCAL) - rtm->rtm_type = RTN_LOCAL; - else if (rt->rt6i_flags & RTF_ANYCAST) - rtm->rtm_type = RTN_ANYCAST; - else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) - rtm->rtm_type = RTN_LOCAL; - else - rtm->rtm_type = RTN_UNICAST; + + rtm->rtm_type = rt->fib6_type; rtm->rtm_flags = 0; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = rt->rt6i_protocol; + rtm->rtm_protocol = rt->fib6_protocol; - if (rt->rt6i_flags & RTF_CACHE) + if (rt->fib6_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; - if (dst) { - if (nla_put_in6_addr(skb, RTA_DST, dst)) + if (dest) { + if (nla_put_in6_addr(skb, RTA_DST, dest)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { @@ -4558,12 +4621,12 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr)) goto nla_put_failure; #endif if (iif) { #ifdef CONFIG_IPV6_MROUTE - if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) { int err = ip6mr_get_route(net, skb, rtm, portid); if (err == 0) @@ -4574,34 +4637,32 @@ static int rt6_fill_node(struct net *net, #endif if (nla_put_u32(skb, RTA_IIF, iif)) goto nla_put_failure; - } else if (dst) { + } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } - if (rt->rt6i_prefsrc.plen) { + if (rt->fib6_prefsrc.plen) { struct in6_addr saddr_buf; - saddr_buf = rt->rt6i_prefsrc.addr; + saddr_buf = rt->fib6_prefsrc.addr; if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } - memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt6i_pmtu) - metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; - if (rtnetlink_put_metrics(skb, metrics) < 0) + pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics; + if (rtnetlink_put_metrics(skb, pmetrics) < 0) goto nla_put_failure; - if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) + if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric)) goto nla_put_failure; /* For multipath routes, walk the siblings list and add * each as a nexthop within RTA_MULTIPATH. */ - if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; struct nlattr *mp; mp = nla_nest_start(skb, RTA_MULTIPATH); @@ -4612,7 +4673,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, rt6i_siblings) { + &rt->fib6_siblings, fib6_siblings) { if (rt6_add_nexthop(skb, sibling) < 0) goto nla_put_failure; } @@ -4623,12 +4684,15 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; + if (rt->fib6_flags & RTF_EXPIRES) { + expires = dst ? dst->expires : rt->expires; + expires -= jiffies; + } - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) + if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) goto nla_put_failure; - if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags))) goto nla_put_failure; @@ -4640,12 +4704,12 @@ nla_put_failure: return -EMSGSIZE; } -int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct fib6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; struct net *net = arg->net; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { @@ -4653,16 +4717,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) /* user wants prefix routes only */ if (rtm->rtm_flags & RTM_F_PREFIX && - !(rt->rt6i_flags & RTF_PREFIX_RT)) { + !(rt->fib6_flags & RTF_PREFIX_RT)) { /* success since this is not a prefix route */ return 1; } } - return rt6_fill_node(net, - arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, - NLM_F_MULTI); + return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, + RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, + arg->cb->nlh->nlmsg_seq, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, @@ -4671,6 +4734,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; int err, iif = 0, oif = 0; + struct fib6_info *from; struct dst_entry *dst; struct rt6_info *rt; struct sk_buff *skb; @@ -4759,14 +4823,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } - if (fibmatch && rt->from) { - struct rt6_info *ort = rt->from; - - dst_hold(&ort->dst); - ip6_rt_put(rt); - rt = ort; - } - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); @@ -4775,14 +4831,21 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } skb_dst_set(skb, &rt->dst); + + rcu_read_lock(); + from = rcu_dereference(rt->from); + if (fibmatch) - err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0); else - err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, + &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, + 0); + rcu_read_unlock(); + if (err < 0) { kfree_skb(skb); goto errout; @@ -4793,7 +4856,7 @@ errout: return err; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int nlm_flags) { struct sk_buff *skb; @@ -4808,8 +4871,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, if (!skb) goto errout; - err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, nlm_flags); + err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, + event, info->portid, seq, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -4834,6 +4897,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, return NOTIFY_OK; if (event == NETDEV_REGISTER) { + net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -5030,11 +5094,17 @@ static int __net_init ip6_route_net_init(struct net *net) if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) goto out_ip6_dst_ops; + net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template, + sizeof(*net->ipv6.fib6_null_entry), + GFP_KERNEL); + if (!net->ipv6.fib6_null_entry) + goto out_ip6_dst_entries; + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, sizeof(*net->ipv6.ip6_null_entry), GFP_KERNEL); if (!net->ipv6.ip6_null_entry) - goto out_ip6_dst_entries; + goto out_fib6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_null_entry->dst, ip6_template_metrics, true); @@ -5081,6 +5151,8 @@ out_ip6_prohibit_entry: out_ip6_null_entry: kfree(net->ipv6.ip6_null_entry); #endif +out_fib6_null_entry: + kfree(net->ipv6.fib6_null_entry); out_ip6_dst_entries: dst_entries_destroy(&net->ipv6.ip6_dst_ops); out_ip6_dst_ops: @@ -5089,6 +5161,7 @@ out_ip6_dst_ops: static void __net_exit ip6_route_net_exit(struct net *net) { + kfree(net->ipv6.fib6_null_entry); kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.ip6_prohibit_entry); @@ -5159,6 +5232,7 @@ void __init ip6_route_init_special_entries(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ + init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 5fe139484919..eab39bd91548 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -91,6 +91,24 @@ static void set_tun_src(struct net *net, struct net_device *dev, rcu_read_unlock(); } +/* Compute flowlabel for outer IPv6 header */ +static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, + struct ipv6hdr *inner_hdr) +{ + int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel; + __be32 flowlabel = 0; + u32 hash; + + if (do_flowlabel > 0) { + hash = skb_get_hash(skb); + rol32(hash, 16); + flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) { + flowlabel = ip6_flowlabel(inner_hdr); + } + return flowlabel; +} + /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { @@ -99,6 +117,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; + __be32 flowlabel; hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); @@ -108,6 +127,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) return err; inner_hdr = ipv6_hdr(skb); + flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); skb_push(skb, tot_len); skb_reset_network_header(skb); @@ -121,10 +141,10 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) if (skb->protocol == htons(ETH_P_IPV6)) { ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), - ip6_flowlabel(inner_hdr)); + flowlabel); hdr->hop_limit = inner_hdr->hop_limit; } else { - ip6_flow_hdr(hdr, 0, 0); + ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6fbdef630152..e15cd37024fd 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -152,6 +152,13 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "seg6_flowlabel", + .data = &init_net.ipv6.sysctl.seg6_flowlabel, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -217,6 +224,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy, + ipv6_table[15].data = &net->ipv6.sysctl.seg6_flowlabel; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4ec76a87aeb8..a34e28ac03a7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1023,7 +1023,8 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * Sending */ -static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) +static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, + struct inet_cork *cork) { struct sock *sk = skb->sk; struct udphdr *uh; @@ -1042,12 +1043,31 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) uh->len = htons(len); uh->check = 0; + if (cork->gso_size) { + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + + if (hlen + cork->gso_size > cork->fragsize) + return -EINVAL; + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (udp_sk(sk)->no_check6_tx) + return -EINVAL; + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + return -EIO; + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + goto csum_partial; + } + if (is_udplite) csum = udplite_csum(skb); else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ +csum_partial: udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; } else @@ -1093,7 +1113,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_v6_send_skb(skb, &fl6); + err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base); out: up->len = 0; @@ -1127,6 +1147,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = -1; ipc6.tclass = -1; ipc6.dontfrag = -1; + ipc6.gso_size = up->gso_size; sockc.tsflags = sk->sk_tsflags; /* destination address check */ @@ -1259,7 +1280,10 @@ do_udp_sendmsg: opt->tot_len = sizeof(*opt); ipc6.opt = opt; - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc); + err = udp_cmsg_send(sk, msg, &ipc6.gso_size); + if (err > 0) + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, + &ipc6, &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1324,15 +1348,16 @@ back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { + struct inet_cork_full cork; struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, &fl6, (struct rt6_info *)dst, - msg->msg_flags, &sockc); + msg->msg_flags, &cork, &sockc); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_v6_send_skb(skb, &fl6); + err = udp_v6_send_skb(skb, &fl6, &cork.base); goto out; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 2a04dc9c781b..f7b85b1e6b3e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,6 +17,20 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" +static struct sk_buff *__udp6_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(gso_skb); + unsigned int mss = skb_shinfo(gso_skb)->gso_size; + + if (!can_checksum_protocol(features, htons(ETH_P_IPV6))) + return ERR_PTR(-EIO); + + return __udp_gso_segment(gso_skb, features, mss, + udp_v6_check(sizeof(struct udphdr) + mss, + &ip6h->saddr, &ip6h->daddr, 0)); +} + static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -42,12 +56,15 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4))) goto out; if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + return __udp6_gso_segment(skb, features); + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 416fe67271a9..2cff209d0fc1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); - xdst->u.rt6.rt6i_metric = rt->rt6i_metric; - xdst->u.rt6.rt6i_node = rt->rt6i_node; xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 7f1e842ef05a..e87686f7d63c 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -57,6 +57,10 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { + /* Drop reference taken during previous invocation */ + if (pd->session) + l2tp_session_dec_refcount(pd->session); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; @@ -105,11 +109,16 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v) if (!pd || pd == SEQ_START_TOKEN) return; - /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */ + /* Drop reference taken by last invocation of l2tp_dfs_next_session() + * or l2tp_dfs_next_tunnel(). + */ + if (pd->session) { + l2tp_session_dec_refcount(pd->session); + pd->session = NULL; + } if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); pd->tunnel = NULL; - pd->session = NULL; } } @@ -250,13 +259,10 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) goto out; } - /* Show the tunnel or session context */ - if (!pd->session) { + if (!pd->session) l2tp_dfs_seq_tunnel_show(m, pd->tunnel); - } else { + else l2tp_dfs_seq_session_show(m, pd->session); - l2tp_session_dec_refcount(pd->session); - } out: return 0; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1fd9e145076a..f951c768dcf2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1576,6 +1576,10 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { + /* Drop reference taken during previous invocation */ + if (pd->session) + l2tp_session_dec_refcount(pd->session); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; @@ -1624,11 +1628,16 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v) if (!pd || pd == SEQ_START_TOKEN) return; - /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */ + /* Drop reference taken by last invocation of pppol2tp_next_session() + * or pppol2tp_next_tunnel(). + */ + if (pd->session) { + l2tp_session_dec_refcount(pd->session); + pd->session = NULL; + } if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); pd->tunnel = NULL; - pd->session = NULL; } } @@ -1723,14 +1732,10 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) goto out; } - /* Show the tunnel or session context. - */ - if (!pd->session) { + if (!pd->session) pppol2tp_seq_tunnel_show(m, pd->tunnel); - } else { + else pppol2tp_seq_session_show(m, pd->session); - l2tp_session_dec_refcount(pd->session); - } out: return 0; diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 8da84312cd3b..8055e3965cef 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -68,15 +68,6 @@ enum { NCSI_MODE_MAX }; -enum { - NCSI_FILTER_BASE = 0, - NCSI_FILTER_VLAN = 0, - NCSI_FILTER_UC, - NCSI_FILTER_MC, - NCSI_FILTER_MIXED, - NCSI_FILTER_MAX -}; - struct ncsi_channel_version { u32 version; /* Supported BCD encoded NCSI version */ u32 alpha2; /* Supported BCD encoded NCSI version */ @@ -98,11 +89,18 @@ struct ncsi_channel_mode { u32 data[8]; /* Data entries */ }; -struct ncsi_channel_filter { - u32 index; /* Index of channel filters */ - u32 total; /* Total entries in the filter table */ - u64 bitmap; /* Bitmap of valid entries */ - u32 data[]; /* Data for the valid entries */ +struct ncsi_channel_mac_filter { + u8 n_uc; + u8 n_mc; + u8 n_mixed; + u64 bitmap; + unsigned char *addrs; +}; + +struct ncsi_channel_vlan_filter { + u8 n_vids; + u64 bitmap; + u16 *vids; }; struct ncsi_channel_stats { @@ -186,7 +184,9 @@ struct ncsi_channel { struct ncsi_channel_version version; struct ncsi_channel_cap caps[NCSI_CAP_MAX]; struct ncsi_channel_mode modes[NCSI_MODE_MAX]; - struct ncsi_channel_filter *filters[NCSI_FILTER_MAX]; + /* Filtering Settings */ + struct ncsi_channel_mac_filter mac_filter; + struct ncsi_channel_vlan_filter vlan_filter; struct ncsi_channel_stats stats; struct { struct timer_list timer; @@ -320,10 +320,6 @@ extern spinlock_t ncsi_dev_lock; list_for_each_entry_rcu(nc, &np->channels, node) /* Resources */ -u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index); -int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data); -int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data); -int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index); void ncsi_start_channel_monitor(struct ncsi_channel *nc); void ncsi_stop_channel_monitor(struct ncsi_channel *nc); struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index c3695ba0cf94..5561e221b71f 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -27,125 +27,6 @@ LIST_HEAD(ncsi_dev_list); DEFINE_SPINLOCK(ncsi_dev_lock); -static inline int ncsi_filter_size(int table) -{ - int sizes[] = { 2, 6, 6, 6 }; - - BUILD_BUG_ON(ARRAY_SIZE(sizes) != NCSI_FILTER_MAX); - if (table < NCSI_FILTER_BASE || table >= NCSI_FILTER_MAX) - return -EINVAL; - - return sizes[table]; -} - -u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) -{ - struct ncsi_channel_filter *ncf; - int size; - - ncf = nc->filters[table]; - if (!ncf) - return NULL; - - size = ncsi_filter_size(table); - if (size < 0) - return NULL; - - return ncf->data + size * index; -} - -/* Find the first active filter in a filter table that matches the given - * data parameter. If data is NULL, this returns the first active filter. - */ -int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data) -{ - struct ncsi_channel_filter *ncf; - void *bitmap; - int index, size; - unsigned long flags; - - ncf = nc->filters[table]; - if (!ncf) - return -ENXIO; - - size = ncsi_filter_size(table); - if (size < 0) - return size; - - spin_lock_irqsave(&nc->lock, flags); - bitmap = (void *)&ncf->bitmap; - index = -1; - while ((index = find_next_bit(bitmap, ncf->total, index + 1)) - < ncf->total) { - if (!data || !memcmp(ncf->data + size * index, data, size)) { - spin_unlock_irqrestore(&nc->lock, flags); - return index; - } - } - spin_unlock_irqrestore(&nc->lock, flags); - - return -ENOENT; -} - -int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data) -{ - struct ncsi_channel_filter *ncf; - int index, size; - void *bitmap; - unsigned long flags; - - size = ncsi_filter_size(table); - if (size < 0) - return size; - - index = ncsi_find_filter(nc, table, data); - if (index >= 0) - return index; - - ncf = nc->filters[table]; - if (!ncf) - return -ENODEV; - - spin_lock_irqsave(&nc->lock, flags); - bitmap = (void *)&ncf->bitmap; - do { - index = find_next_zero_bit(bitmap, ncf->total, 0); - if (index >= ncf->total) { - spin_unlock_irqrestore(&nc->lock, flags); - return -ENOSPC; - } - } while (test_and_set_bit(index, bitmap)); - - memcpy(ncf->data + size * index, data, size); - spin_unlock_irqrestore(&nc->lock, flags); - - return index; -} - -int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index) -{ - struct ncsi_channel_filter *ncf; - int size; - void *bitmap; - unsigned long flags; - - size = ncsi_filter_size(table); - if (size < 0) - return size; - - ncf = nc->filters[table]; - if (!ncf || index >= ncf->total) - return -ENODEV; - - spin_lock_irqsave(&nc->lock, flags); - bitmap = (void *)&ncf->bitmap; - if (test_and_clear_bit(index, bitmap)) - memset(ncf->data + size * index, 0, size); - spin_unlock_irqrestore(&nc->lock, flags); - - return 0; -} - static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) { struct ncsi_dev *nd = &ndp->ndev; @@ -339,20 +220,13 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) static void ncsi_remove_channel(struct ncsi_channel *nc) { struct ncsi_package *np = nc->package; - struct ncsi_channel_filter *ncf; unsigned long flags; - int i; - /* Release filters */ spin_lock_irqsave(&nc->lock, flags); - for (i = 0; i < NCSI_FILTER_MAX; i++) { - ncf = nc->filters[i]; - if (!ncf) - continue; - nc->filters[i] = NULL; - kfree(ncf); - } + /* Release filters */ + kfree(nc->mac_filter.addrs); + kfree(nc->vlan_filter.vids); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); @@ -670,32 +544,26 @@ error: static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, struct ncsi_cmd_arg *nca) { + struct ncsi_channel_vlan_filter *ncf; + unsigned long flags; + void *bitmap; int index; - u32 *data; u16 vid; - index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL); - if (index < 0) { - /* Filter table empty */ - return -1; - } + ncf = &nc->vlan_filter; + bitmap = &ncf->bitmap; - data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index); - if (!data) { - netdev_err(ndp->ndev.dev, - "NCSI: failed to retrieve filter %d\n", index); - /* Set the VLAN id to 0 - this will still disable the entry in - * the filter table, but we won't know what it was. - */ - vid = 0; - } else { - vid = *(u16 *)data; + spin_lock_irqsave(&nc->lock, flags); + index = find_next_bit(bitmap, ncf->n_vids, 0); + if (index >= ncf->n_vids) { + spin_unlock_irqrestore(&nc->lock, flags); + return -1; } + vid = ncf->vids[index]; - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "NCSI: removed vlan tag %u at index %d\n", - vid, index + 1); - ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index); + clear_bit(index, bitmap); + ncf->vids[index] = 0; + spin_unlock_irqrestore(&nc->lock, flags); nca->type = NCSI_PKT_CMD_SVF; nca->words[1] = vid; @@ -711,45 +579,55 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, struct ncsi_cmd_arg *nca) { + struct ncsi_channel_vlan_filter *ncf; struct vlan_vid *vlan = NULL; - int index = 0; + unsigned long flags; + int i, index; + void *bitmap; + u16 vid; + if (list_empty(&ndp->vlan_vids)) + return -1; + + ncf = &nc->vlan_filter; + bitmap = &ncf->bitmap; + + spin_lock_irqsave(&nc->lock, flags); + + rcu_read_lock(); list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { - index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid); - if (index < 0) { - /* New tag to add */ - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "NCSI: new vlan id to set: %u\n", - vlan->vid); + vid = vlan->vid; + for (i = 0; i < ncf->n_vids; i++) + if (ncf->vids[i] == vid) { + vid = 0; + break; + } + if (vid) break; - } - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "vid %u already at filter pos %d\n", - vlan->vid, index); } + rcu_read_unlock(); - if (!vlan || index >= 0) { - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "no vlan ids left to set\n"); + if (!vid) { + /* No VLAN ID is not set */ + spin_unlock_irqrestore(&nc->lock, flags); return -1; } - index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid); - if (index < 0) { + index = find_next_zero_bit(bitmap, ncf->n_vids, 0); + if (index < 0 || index >= ncf->n_vids) { netdev_err(ndp->ndev.dev, - "Failed to add new VLAN tag, error %d\n", index); - if (index == -ENOSPC) - netdev_err(ndp->ndev.dev, - "Channel %u already has all VLAN filters set\n", - nc->id); + "Channel %u already has all VLAN filters set\n", + nc->id); + spin_unlock_irqrestore(&nc->lock, flags); return -1; } - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "NCSI: set vid %u in packet, index %u\n", - vlan->vid, index + 1); + ncf->vids[index] = vid; + set_bit(index, bitmap); + spin_unlock_irqrestore(&nc->lock, flags); + nca->type = NCSI_PKT_CMD_SVF; - nca->words[1] = vlan->vid; + nca->words[1] = vid; /* HW filter index starts at 1 */ nca->bytes[6] = index + 1; nca->bytes[7] = 0x01; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 8d7e849d4825..b09ef77bf4cd 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -58,10 +58,9 @@ static int ncsi_write_channel_info(struct sk_buff *skb, struct ncsi_dev_priv *ndp, struct ncsi_channel *nc) { - struct nlattr *vid_nest; - struct ncsi_channel_filter *ncf; + struct ncsi_channel_vlan_filter *ncf; struct ncsi_channel_mode *m; - u32 *data; + struct nlattr *vid_nest; int i; nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id); @@ -79,18 +78,13 @@ static int ncsi_write_channel_info(struct sk_buff *skb, vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); if (!vid_nest) return -ENOMEM; - ncf = nc->filters[NCSI_FILTER_VLAN]; + ncf = &nc->vlan_filter; i = -1; - if (ncf) { - while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total, - i + 1)) < ncf->total) { - data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i); - /* Uninitialised channels will have 'zero' vlan ids */ - if (!data || !*data) - continue; + while ((i = find_next_bit((void *)&ncf->bitmap, ncf->n_vids, + i + 1)) < ncf->n_vids) { + if (ncf->vids[i]) nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID, - *(u16 *)data); - } + ncf->vids[i]); } nla_nest_end(skb, vid_nest); diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index efd933ff5570..ce9497966ebe 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -334,9 +334,9 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr) struct ncsi_rsp_pkt *rsp; struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_channel *nc; - struct ncsi_channel_filter *ncf; - unsigned short vlan; - int ret; + struct ncsi_channel_vlan_filter *ncf; + unsigned long flags; + void *bitmap; /* Find the package and channel */ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); @@ -346,22 +346,23 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr) return -ENODEV; cmd = (struct ncsi_cmd_svf_pkt *)skb_network_header(nr->cmd); - ncf = nc->filters[NCSI_FILTER_VLAN]; - if (!ncf) - return -ENOENT; - if (cmd->index >= ncf->total) + ncf = &nc->vlan_filter; + if (cmd->index > ncf->n_vids) return -ERANGE; - /* Add or remove the VLAN filter */ + /* Add or remove the VLAN filter. Remember HW indexes from 1 */ + spin_lock_irqsave(&nc->lock, flags); + bitmap = &ncf->bitmap; if (!(cmd->enable & 0x1)) { - /* HW indexes from 1 */ - ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1); + if (test_and_clear_bit(cmd->index - 1, bitmap)) + ncf->vids[cmd->index - 1] = 0; } else { - vlan = ntohs(cmd->vlan); - ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan); + set_bit(cmd->index - 1, bitmap); + ncf->vids[cmd->index - 1] = ntohs(cmd->vlan); } + spin_unlock_irqrestore(&nc->lock, flags); - return ret; + return 0; } static int ncsi_rsp_handler_ev(struct ncsi_request *nr) @@ -422,8 +423,12 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr) struct ncsi_rsp_pkt *rsp; struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_channel *nc; - struct ncsi_channel_filter *ncf; + struct ncsi_channel_mac_filter *ncf; + unsigned long flags; void *bitmap; + bool enabled; + int index; + /* Find the package and channel */ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); @@ -436,31 +441,23 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr) * isn't supported yet. */ cmd = (struct ncsi_cmd_sma_pkt *)skb_network_header(nr->cmd); - switch (cmd->at_e >> 5) { - case 0x0: /* UC address */ - ncf = nc->filters[NCSI_FILTER_UC]; - break; - case 0x1: /* MC address */ - ncf = nc->filters[NCSI_FILTER_MC]; - break; - default: - return -EINVAL; - } + enabled = cmd->at_e & 0x1; + ncf = &nc->mac_filter; + bitmap = &ncf->bitmap; - /* Sanity check on the filter */ - if (!ncf) - return -ENOENT; - else if (cmd->index >= ncf->total) + if (cmd->index > ncf->n_uc + ncf->n_mc + ncf->n_mixed) return -ERANGE; - bitmap = &ncf->bitmap; - if (cmd->at_e & 0x1) { - set_bit(cmd->index, bitmap); - memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6); + index = (cmd->index - 1) * ETH_ALEN; + spin_lock_irqsave(&nc->lock, flags); + if (enabled) { + set_bit(cmd->index - 1, bitmap); + memcpy(&ncf->addrs[index], cmd->mac, ETH_ALEN); } else { - clear_bit(cmd->index, bitmap); - memset(ncf->data + 6 * cmd->index, 0, 6); + clear_bit(cmd->index - 1, bitmap); + memset(&ncf->addrs[index], 0, ETH_ALEN); } + spin_unlock_irqrestore(&nc->lock, flags); return 0; } @@ -631,9 +628,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) struct ncsi_rsp_gc_pkt *rsp; struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_channel *nc; - struct ncsi_channel_filter *ncf; - size_t size, entry_size; - int cnt, i; + size_t size; /* Find the channel */ rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp); @@ -655,64 +650,40 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) nc->caps[NCSI_CAP_VLAN].cap = rsp->vlan_mode & NCSI_CAP_VLAN_MASK; - /* Build filters */ - for (i = 0; i < NCSI_FILTER_MAX; i++) { - switch (i) { - case NCSI_FILTER_VLAN: - cnt = rsp->vlan_cnt; - entry_size = 2; - break; - case NCSI_FILTER_MIXED: - cnt = rsp->mixed_cnt; - entry_size = 6; - break; - case NCSI_FILTER_MC: - cnt = rsp->mc_cnt; - entry_size = 6; - break; - case NCSI_FILTER_UC: - cnt = rsp->uc_cnt; - entry_size = 6; - break; - default: - continue; - } - - if (!cnt || nc->filters[i]) - continue; - - size = sizeof(*ncf) + cnt * entry_size; - ncf = kzalloc(size, GFP_ATOMIC); - if (!ncf) { - pr_warn("%s: Cannot alloc filter table (%d)\n", - __func__, i); - return -ENOMEM; - } - - ncf->index = i; - ncf->total = cnt; - if (i == NCSI_FILTER_VLAN) { - /* Set VLAN filters active so they are cleared in - * first configuration state - */ - ncf->bitmap = U64_MAX; - } else { - ncf->bitmap = 0x0ul; - } - nc->filters[i] = ncf; - } + size = (rsp->uc_cnt + rsp->mc_cnt + rsp->mixed_cnt) * ETH_ALEN; + nc->mac_filter.addrs = kzalloc(size, GFP_KERNEL); + if (!nc->mac_filter.addrs) + return -ENOMEM; + nc->mac_filter.n_uc = rsp->uc_cnt; + nc->mac_filter.n_mc = rsp->mc_cnt; + nc->mac_filter.n_mixed = rsp->mixed_cnt; + + nc->vlan_filter.vids = kcalloc(rsp->vlan_cnt, + sizeof(*nc->vlan_filter.vids), + GFP_KERNEL); + if (!nc->vlan_filter.vids) + return -ENOMEM; + /* Set VLAN filters active so they are cleared in the first + * configuration state + */ + nc->vlan_filter.bitmap = U64_MAX; + nc->vlan_filter.n_vids = rsp->vlan_cnt; return 0; } static int ncsi_rsp_handler_gp(struct ncsi_request *nr) { - struct ncsi_rsp_gp_pkt *rsp; + struct ncsi_channel_vlan_filter *ncvf; + struct ncsi_channel_mac_filter *ncmf; struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_rsp_gp_pkt *rsp; struct ncsi_channel *nc; - unsigned short enable, vlan; + unsigned short enable; unsigned char *pdata; - int table, i; + unsigned long flags; + void *bitmap; + int i; /* Find the channel */ rsp = (struct ncsi_rsp_gp_pkt *)skb_network_header(nr->rsp); @@ -746,36 +717,33 @@ static int ncsi_rsp_handler_gp(struct ncsi_request *nr) /* MAC addresses filter table */ pdata = (unsigned char *)rsp + 48; enable = rsp->mac_enable; + ncmf = &nc->mac_filter; + spin_lock_irqsave(&nc->lock, flags); + bitmap = &ncmf->bitmap; for (i = 0; i < rsp->mac_cnt; i++, pdata += 6) { - if (i >= (nc->filters[NCSI_FILTER_UC]->total + - nc->filters[NCSI_FILTER_MC]->total)) - table = NCSI_FILTER_MIXED; - else if (i >= nc->filters[NCSI_FILTER_UC]->total) - table = NCSI_FILTER_MC; - else - table = NCSI_FILTER_UC; - if (!(enable & (0x1 << i))) - continue; - - if (ncsi_find_filter(nc, table, pdata) >= 0) - continue; + clear_bit(i, bitmap); + else + set_bit(i, bitmap); - ncsi_add_filter(nc, table, pdata); + memcpy(&ncmf->addrs[i * ETH_ALEN], pdata, ETH_ALEN); } + spin_unlock_irqrestore(&nc->lock, flags); /* VLAN filter table */ enable = ntohs(rsp->vlan_enable); + ncvf = &nc->vlan_filter; + bitmap = &ncvf->bitmap; + spin_lock_irqsave(&nc->lock, flags); for (i = 0; i < rsp->vlan_cnt; i++, pdata += 2) { if (!(enable & (0x1 << i))) - continue; - - vlan = ntohs(*(__be16 *)pdata); - if (ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan) >= 0) - continue; + clear_bit(i, bitmap); + else + set_bit(i, bitmap); - ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan); + ncvf->vids[i] = ntohs(*(__be16 *)pdata); } + spin_unlock_irqrestore(&nc->lock, flags); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 4527921b1c3a..ba0a0fd045c8 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -266,12 +266,13 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, /* check and decrement ttl */ if (ipv6_hdr(skb)->hop_limit <= 1) { + struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); + /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INHDRERRORS); + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); return false; } diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig index 326fd97444f5..1944834d225c 100644 --- a/net/qrtr/Kconfig +++ b/net/qrtr/Kconfig @@ -21,4 +21,11 @@ config QRTR_SMD Say Y here to support SMD based ipcrouter channels. SMD is the most common transport for IPC Router. +config QRTR_TUN + tristate "TUN device for Qualcomm IPC Router" + ---help--- + Say Y here to expose a character device that allows user space to + implement endpoints of QRTR, for purpose of tunneling data to other + hosts or testing purposes. + endif # QRTR diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile index ab09e40f7c74..be012bfd3e52 100644 --- a/net/qrtr/Makefile +++ b/net/qrtr/Makefile @@ -2,3 +2,5 @@ obj-$(CONFIG_QRTR) := qrtr.o obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o qrtr-smd-y := smd.o +obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o +qrtr-tun-y := tun.o diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c new file mode 100644 index 000000000000..ccff1e544c21 --- /dev/null +++ b/net/qrtr/tun.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Linaro Ltd */ + +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/skbuff.h> +#include <linux/uaccess.h> + +#include "qrtr.h" + +struct qrtr_tun { + struct qrtr_endpoint ep; + + struct sk_buff_head queue; + wait_queue_head_t readq; +}; + +static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb) +{ + struct qrtr_tun *tun = container_of(ep, struct qrtr_tun, ep); + + skb_queue_tail(&tun->queue, skb); + + /* wake up any blocking processes, waiting for new data */ + wake_up_interruptible(&tun->readq); + + return 0; +} + +static int qrtr_tun_open(struct inode *inode, struct file *filp) +{ + struct qrtr_tun *tun; + + tun = kzalloc(sizeof(*tun), GFP_KERNEL); + if (!tun) + return -ENOMEM; + + skb_queue_head_init(&tun->queue); + init_waitqueue_head(&tun->readq); + + tun->ep.xmit = qrtr_tun_send; + + filp->private_data = tun; + + return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO); +} + +static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *filp = iocb->ki_filp; + struct qrtr_tun *tun = filp->private_data; + struct sk_buff *skb; + int count; + + while (!(skb = skb_dequeue(&tun->queue))) { + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + /* Wait until we get data or the endpoint goes away */ + if (wait_event_interruptible(tun->readq, + !skb_queue_empty(&tun->queue))) + return -ERESTARTSYS; + } + + count = min_t(size_t, iov_iter_count(to), skb->len); + if (copy_to_iter(skb->data, count, to) != count) + count = -EFAULT; + + kfree_skb(skb); + + return count; +} + +static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *filp = iocb->ki_filp; + struct qrtr_tun *tun = filp->private_data; + size_t len = iov_iter_count(from); + ssize_t ret; + void *kbuf; + + kbuf = kzalloc(len, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + if (!copy_from_iter_full(kbuf, len, from)) + return -EFAULT; + + ret = qrtr_endpoint_post(&tun->ep, kbuf, len); + + return ret < 0 ? ret : len; +} + +static __poll_t qrtr_tun_poll(struct file *filp, poll_table *wait) +{ + struct qrtr_tun *tun = filp->private_data; + __poll_t mask = 0; + + poll_wait(filp, &tun->readq, wait); + + if (!skb_queue_empty(&tun->queue)) + mask |= EPOLLIN | EPOLLRDNORM; + + return mask; +} + +static int qrtr_tun_release(struct inode *inode, struct file *filp) +{ + struct qrtr_tun *tun = filp->private_data; + struct sk_buff *skb; + + qrtr_endpoint_unregister(&tun->ep); + + /* Discard all SKBs */ + while (!skb_queue_empty(&tun->queue)) { + skb = skb_dequeue(&tun->queue); + kfree_skb(skb); + } + + kfree(tun); + + return 0; +} + +static const struct file_operations qrtr_tun_ops = { + .owner = THIS_MODULE, + .open = qrtr_tun_open, + .poll = qrtr_tun_poll, + .read_iter = qrtr_tun_read_iter, + .write_iter = qrtr_tun_write_iter, + .release = qrtr_tun_release, +}; + +static struct miscdevice qrtr_tun_miscdev = { + MISC_DYNAMIC_MINOR, + "qrtr-tun", + &qrtr_tun_ops, +}; + +static int __init qrtr_tun_init(void) +{ + int ret; + + ret = misc_register(&qrtr_tun_miscdev); + if (ret) + pr_err("failed to register Qualcomm IPC Router tun device\n"); + + return ret; +} + +static void __exit qrtr_tun_exit(void) +{ + misc_deregister(&qrtr_tun_miscdev); +} + +module_init(qrtr_tun_init); +module_exit(qrtr_tun_exit); + +MODULE_DESCRIPTION("Qualcomm IPC Router TUN device"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 7e28b2ce1437..526a8e491626 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -648,6 +648,11 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } +static size_t tcf_csum_get_fill_size(const struct tc_action *act) +{ + return nla_total_size(sizeof(struct tc_csum)); +} + static struct tc_action_ops act_csum_ops = { .kind = "csum", .type = TCA_ACT_CSUM, @@ -658,6 +663,7 @@ static struct tc_action_ops act_csum_ops = { .cleanup = tcf_csum_cleanup, .walk = tcf_csum_walker, .lookup = tcf_csum_search, + .get_fill_size = tcf_csum_get_fill_size, .size = sizeof(struct tcf_csum), }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d964e60c730e..eacaaf803914 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -61,16 +61,18 @@ struct fl_flow_mask_range { struct fl_flow_mask { struct fl_flow_key key; struct fl_flow_mask_range range; - struct rcu_head rcu; + struct rhash_head ht_node; + struct rhashtable ht; + struct rhashtable_params filter_ht_params; + struct flow_dissector dissector; + struct list_head filters; + struct rcu_head rcu; + struct list_head list; }; struct cls_fl_head { struct rhashtable ht; - struct fl_flow_mask mask; - struct flow_dissector dissector; - bool mask_assigned; - struct list_head filters; - struct rhashtable_params ht_params; + struct list_head masks; union { struct work_struct work; struct rcu_head rcu; @@ -79,6 +81,7 @@ struct cls_fl_head { }; struct cls_fl_filter { + struct fl_flow_mask *mask; struct rhash_head ht_node; struct fl_flow_key mkey; struct tcf_exts exts; @@ -94,6 +97,13 @@ struct cls_fl_filter { struct net_device *hw_dev; }; +static const struct rhashtable_params mask_ht_params = { + .key_offset = offsetof(struct fl_flow_mask, key), + .key_len = sizeof(struct fl_flow_key), + .head_offset = offsetof(struct fl_flow_mask, ht_node), + .automatic_shrinking = true, +}; + static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) { return mask->range.end - mask->range.start; @@ -103,13 +113,19 @@ static void fl_mask_update_range(struct fl_flow_mask *mask) { const u8 *bytes = (const u8 *) &mask->key; size_t size = sizeof(mask->key); - size_t i, first = 0, last = size - 1; + size_t i, first = 0, last; - for (i = 0; i < sizeof(mask->key); i++) { + for (i = 0; i < size; i++) { + if (bytes[i]) { + first = i; + break; + } + } + last = first; + for (i = size - 1; i != first; i--) { if (bytes[i]) { - if (!first && i) - first = i; last = i; + break; } } mask->range.start = rounddown(first, sizeof(long)); @@ -140,12 +156,11 @@ static void fl_clear_masked_range(struct fl_flow_key *key, memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); } -static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head, +static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask, struct fl_flow_key *mkey) { - return rhashtable_lookup_fast(&head->ht, - fl_key_get_start(mkey, &head->mask), - head->ht_params); + return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask), + mask->filter_ht_params); } static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -153,28 +168,28 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_fl_head *head = rcu_dereference_bh(tp->root); struct cls_fl_filter *f; + struct fl_flow_mask *mask; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; - if (!atomic_read(&head->ht.nelems)) - return -1; - - fl_clear_masked_range(&skb_key, &head->mask); + list_for_each_entry_rcu(mask, &head->masks, list) { + fl_clear_masked_range(&skb_key, mask); - skb_key.indev_ifindex = skb->skb_iif; - /* skb_flow_dissect() does not set n_proto in case an unknown protocol, - * so do it rather here. - */ - skb_key.basic.n_proto = skb->protocol; - skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key); - skb_flow_dissect(skb, &head->dissector, &skb_key, 0); + skb_key.indev_ifindex = skb->skb_iif; + /* skb_flow_dissect() does not set n_proto in case an unknown + * protocol, so do it rather here. + */ + skb_key.basic.n_proto = skb->protocol; + skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); + skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); - fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); + fl_set_masked_key(&skb_mkey, &skb_key, mask); - f = fl_lookup(head, &skb_mkey); - if (f && !tc_skip_sw(f->flags)) { - *res = f->res; - return tcf_exts_exec(skb, &f->exts, res); + f = fl_lookup(mask, &skb_mkey); + if (f && !tc_skip_sw(f->flags)) { + *res = f->res; + return tcf_exts_exec(skb, &f->exts, res); + } } return -1; } @@ -187,11 +202,28 @@ static int fl_init(struct tcf_proto *tp) if (!head) return -ENOBUFS; - INIT_LIST_HEAD_RCU(&head->filters); + INIT_LIST_HEAD_RCU(&head->masks); rcu_assign_pointer(tp->root, head); idr_init(&head->handle_idr); - return 0; + return rhashtable_init(&head->ht, &mask_ht_params); +} + +static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, + bool async) +{ + if (!list_empty(&mask->filters)) + return false; + + rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params); + rhashtable_destroy(&mask->ht); + list_del_rcu(&mask->list); + if (async) + kfree_rcu(mask, rcu); + else + kfree(mask); + + return true; } static void __fl_destroy_filter(struct cls_fl_filter *f) @@ -234,8 +266,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, } static int fl_hw_replace_filter(struct tcf_proto *tp, - struct flow_dissector *dissector, - struct fl_flow_key *mask, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { @@ -247,8 +277,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; - cls_flower.dissector = dissector; - cls_flower.mask = mask; + cls_flower.dissector = &f->mask->dissector; + cls_flower.mask = &f->mask->key; cls_flower.key = &f->mkey; cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; @@ -283,28 +313,31 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) &cls_flower, false); } -static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, +static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); + bool async = tcf_exts_get_net(&f->exts); + bool last; idr_remove(&head->handle_idr, f->handle); list_del_rcu(&f->list); + last = fl_mask_put(head, f->mask, async); if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f, extack); tcf_unbind_filter(tp, &f->res); - if (tcf_exts_get_net(&f->exts)) + if (async) call_rcu(&f->rcu, fl_destroy_filter); else __fl_destroy_filter(f); + + return last; } static void fl_destroy_sleepable(struct work_struct *work) { struct cls_fl_head *head = container_of(work, struct cls_fl_head, work); - if (head->mask_assigned) - rhashtable_destroy(&head->ht); kfree(head); module_put(THIS_MODULE); } @@ -320,10 +353,15 @@ static void fl_destroy_rcu(struct rcu_head *rcu) static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); + struct fl_flow_mask *mask, *next_mask; struct cls_fl_filter *f, *next; - list_for_each_entry_safe(f, next, &head->filters, list) - __fl_delete(tp, f, extack); + list_for_each_entry_safe(mask, next_mask, &head->masks, list) { + list_for_each_entry_safe(f, next, &mask->filters, list) { + if (__fl_delete(tp, f, extack)) + break; + } + } idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); @@ -715,14 +753,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, return ret; } -static bool fl_mask_eq(struct fl_flow_mask *mask1, - struct fl_flow_mask *mask2) +static void fl_mask_copy(struct fl_flow_mask *dst, + struct fl_flow_mask *src) { - const long *lmask1 = fl_key_get_start(&mask1->key, mask1); - const long *lmask2 = fl_key_get_start(&mask2->key, mask2); + const void *psrc = fl_key_get_start(&src->key, src); + void *pdst = fl_key_get_start(&dst->key, src); - return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) && - !memcmp(lmask1, lmask2, fl_mask_range(mask1)); + memcpy(pdst, psrc, fl_mask_range(src)); + dst->range = src->range; } static const struct rhashtable_params fl_ht_params = { @@ -731,14 +769,13 @@ static const struct rhashtable_params fl_ht_params = { .automatic_shrinking = true, }; -static int fl_init_hashtable(struct cls_fl_head *head, - struct fl_flow_mask *mask) +static int fl_init_mask_hashtable(struct fl_flow_mask *mask) { - head->ht_params = fl_ht_params; - head->ht_params.key_len = fl_mask_range(mask); - head->ht_params.key_offset += mask->range.start; + mask->filter_ht_params = fl_ht_params; + mask->filter_ht_params.key_len = fl_mask_range(mask); + mask->filter_ht_params.key_offset += mask->range.start; - return rhashtable_init(&head->ht, &head->ht_params); + return rhashtable_init(&mask->ht, &mask->filter_ht_params); } #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) @@ -761,8 +798,7 @@ static int fl_init_hashtable(struct cls_fl_head *head, FL_KEY_SET(keys, cnt, id, member); \ } while(0); -static void fl_init_dissector(struct cls_fl_head *head, - struct fl_flow_mask *mask) +static void fl_init_dissector(struct fl_flow_mask *mask) { struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; size_t cnt = 0; @@ -802,31 +838,66 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); - skb_flow_dissector_init(&head->dissector, keys, cnt); + skb_flow_dissector_init(&mask->dissector, keys, cnt); +} + +static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + struct fl_flow_mask *newmask; + int err; + + newmask = kzalloc(sizeof(*newmask), GFP_KERNEL); + if (!newmask) + return ERR_PTR(-ENOMEM); + + fl_mask_copy(newmask, mask); + + err = fl_init_mask_hashtable(newmask); + if (err) + goto errout_free; + + fl_init_dissector(newmask); + + INIT_LIST_HEAD_RCU(&newmask->filters); + + err = rhashtable_insert_fast(&head->ht, &newmask->ht_node, + mask_ht_params); + if (err) + goto errout_destroy; + + list_add_tail_rcu(&newmask->list, &head->masks); + + return newmask; + +errout_destroy: + rhashtable_destroy(&newmask->ht); +errout_free: + kfree(newmask); + + return ERR_PTR(err); } static int fl_check_assign_mask(struct cls_fl_head *head, + struct cls_fl_filter *fnew, + struct cls_fl_filter *fold, struct fl_flow_mask *mask) { - int err; + struct fl_flow_mask *newmask; - if (head->mask_assigned) { - if (!fl_mask_eq(&head->mask, mask)) + fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params); + if (!fnew->mask) { + if (fold) return -EINVAL; - else - return 0; - } - /* Mask is not assigned yet. So assign it and init hashtable - * according to that. - */ - err = fl_init_hashtable(head, mask); - if (err) - return err; - memcpy(&head->mask, mask, sizeof(head->mask)); - head->mask_assigned = true; + newmask = fl_create_new_mask(head, mask); + if (IS_ERR(newmask)) + return PTR_ERR(newmask); - fl_init_dissector(head, mask); + fnew->mask = newmask; + } else if (fold && fold->mask == fnew->mask) { + return -EINVAL; + } return 0; } @@ -924,30 +995,26 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout_idr; - err = fl_check_assign_mask(head, &mask); + err = fl_check_assign_mask(head, fnew, fold, &mask); if (err) goto errout_idr; if (!tc_skip_sw(fnew->flags)) { - if (!fold && fl_lookup(head, &fnew->mkey)) { + if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) { err = -EEXIST; - goto errout_idr; + goto errout_mask; } - err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, - head->ht_params); + err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); if (err) - goto errout_idr; + goto errout_mask; } if (!tc_skip_hw(fnew->flags)) { - err = fl_hw_replace_filter(tp, - &head->dissector, - &mask.key, - fnew, - extack); + err = fl_hw_replace_filter(tp, fnew, extack); if (err) - goto errout_idr; + goto errout_mask; } if (!tc_in_hw(fnew->flags)) @@ -955,8 +1022,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (fold) { if (!tc_skip_sw(fold->flags)) - rhashtable_remove_fast(&head->ht, &fold->ht_node, - head->ht_params); + rhashtable_remove_fast(&fold->mask->ht, + &fold->ht_node, + fold->mask->filter_ht_params); if (!tc_skip_hw(fold->flags)) fl_hw_destroy_filter(tp, fold, NULL); } @@ -970,12 +1038,15 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { - list_add_tail_rcu(&fnew->list, &head->filters); + list_add_tail_rcu(&fnew->list, &fnew->mask->filters); } kfree(tb); return 0; +errout_mask: + fl_mask_put(head, fnew->mask, false); + errout_idr: if (fnew->handle) idr_remove(&head->handle_idr, fnew->handle); @@ -994,10 +1065,10 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, struct cls_fl_filter *f = arg; if (!tc_skip_sw(f->flags)) - rhashtable_remove_fast(&head->ht, &f->ht_node, - head->ht_params); + rhashtable_remove_fast(&f->mask->ht, &f->ht_node, + f->mask->filter_ht_params); __fl_delete(tp, f, extack); - *last = list_empty(&head->filters); + *last = list_empty(&head->masks); return 0; } @@ -1005,16 +1076,19 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; - - list_for_each_entry_rcu(f, &head->filters, list) { - if (arg->count < arg->skip) - goto skip; - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; - break; - } + struct fl_flow_mask *mask; + + list_for_each_entry_rcu(mask, &head->masks, list) { + list_for_each_entry_rcu(f, &mask->filters, list) { + if (arg->count < arg->skip) + goto skip; + if (arg->fn(tp, f, arg) < 0) { + arg->stop = 1; + break; + } skip: - arg->count++; + arg->count++; + } } } @@ -1150,7 +1224,6 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; @@ -1169,7 +1242,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, goto nla_put_failure; key = &f->key; - mask = &head->mask.key; + mask = &f->mask->key; if (mask->indev_ifindex) { struct net_device *dev; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 837806dd5799..039fdb862b17 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -652,33 +652,20 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, */ peer->param_flags = asoc->param_flags; - sctp_transport_route(peer, NULL, sp); - /* Initialize the pmtu of the transport. */ - if (peer->param_flags & SPP_PMTUD_DISABLE) { - if (asoc->pathmtu) - peer->pathmtu = asoc->pathmtu; - else - peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT; - } + sctp_transport_route(peer, NULL, sp); /* If this is the first transport addr on this association, * initialize the association PMTU to the peer's PMTU. * If not and the current association PMTU is higher than the new * peer's PMTU, reset the association PMTU to the new peer's PMTU. */ - if (asoc->pathmtu) - asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu); - else - asoc->pathmtu = peer->pathmtu; - - pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc, - asoc->pathmtu); + sctp_assoc_set_pmtu(asoc, asoc->pathmtu ? + min_t(int, peer->pathmtu, asoc->pathmtu) : + peer->pathmtu); peer->pmtu_pending = 0; - asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); - /* The asoc->peer.port might not be meaningful yet, but * initialize the packet structure anyway. */ @@ -988,31 +975,6 @@ out: return match; } -/* Is this the association we are looking for? */ -struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, - struct net *net, - const union sctp_addr *laddr, - const union sctp_addr *paddr) -{ - struct sctp_transport *transport; - - if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && - (htons(asoc->peer.port) == paddr->v4.sin_port) && - net_eq(sock_net(asoc->base.sk), net)) { - transport = sctp_assoc_lookup_paddr(asoc, paddr); - if (!transport) - goto out; - - if (sctp_bind_addr_match(&asoc->base.bind_addr, laddr, - sctp_sk(asoc->base.sk))) - goto out; - } - transport = NULL; - -out: - return transport; -} - /* Do delayed input processing. This is scheduled by sctp_rcv(). */ static void sctp_assoc_bh_rcv(struct work_struct *work) { @@ -1406,6 +1368,31 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc, } } +void sctp_assoc_update_frag_point(struct sctp_association *asoc) +{ + int frag = sctp_mtu_payload(sctp_sk(asoc->base.sk), asoc->pathmtu, + sctp_datachk_len(&asoc->stream)); + + if (asoc->user_frag) + frag = min_t(int, frag, asoc->user_frag); + + frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN - + sctp_datachk_len(&asoc->stream)); + + asoc->frag_point = SCTP_TRUNC4(frag); +} + +void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu) +{ + if (asoc->pathmtu != pmtu) { + asoc->pathmtu = pmtu; + sctp_assoc_update_frag_point(asoc); + } + + pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc, + asoc->pathmtu, asoc->frag_point); +} + /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ @@ -1418,24 +1405,16 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) return; /* Get the lowest pmtu of all the transports. */ - list_for_each_entry(t, &asoc->peer.transport_addr_list, - transports) { + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu( - t, SCTP_TRUNC4(dst_mtu(t->dst))); + sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst)); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) pmtu = t->pathmtu; } - if (pmtu) { - asoc->pathmtu = pmtu; - asoc->frag_point = sctp_frag_point(asoc, pmtu); - } - - pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc, - asoc->pathmtu, asoc->frag_point); + sctp_assoc_set_pmtu(asoc, pmtu); } /* Should we send a SACK to update our peer? */ diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index be296d633e95..79daa98208c3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -172,8 +172,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct list_head *pos, *temp; struct sctp_chunk *chunk; struct sctp_datamsg *msg; - struct sctp_sock *sp; - struct sctp_af *af; int err; msg = sctp_datamsg_new(GFP_KERNEL); @@ -192,12 +190,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* This is the biggest possible DATA chunk that can fit into * the packet */ - sp = sctp_sk(asoc->base.sk); - af = sp->pf->af; - max_data = asoc->pathmtu - af->net_header_len - - sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream) - - af->ip_options_len(asoc->base.sk); - max_data = SCTP_TRUNC4(max_data); + max_data = asoc->frag_point; /* If the the peer requested that we authenticate DATA chunks * we need to account for bundling of the AUTH chunks along with @@ -222,9 +215,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, } } - /* Check what's our max considering the above */ - max_data = min_t(size_t, max_data, asoc->frag_point); - /* Set first_len and then account for possible bundles on first frag */ first_len = max_data; diff --git a/net/sctp/output.c b/net/sctp/output.c index 690d8557bb7b..e672dee302c7 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -90,8 +90,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; + struct sctp_sock *sp = NULL; struct sock *sk; - size_t overhead = sizeof(struct ipv6hdr) + sizeof(struct sctphdr); pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); packet->vtag = vtag; @@ -102,28 +102,20 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, /* set packet max_size with pathmtu, then calculate overhead */ packet->max_size = tp->pathmtu; + if (asoc) { - struct sctp_sock *sp = sctp_sk(asoc->base.sk); - struct sctp_af *af = sp->pf->af; - - overhead = af->net_header_len + - af->ip_options_len(asoc->base.sk); - overhead += sizeof(struct sctphdr); - packet->overhead = overhead; - packet->size = overhead; - } else { - packet->overhead = overhead; - packet->size = overhead; - return; + sk = asoc->base.sk; + sp = sctp_sk(sk); } + packet->overhead = sctp_mtu_payload(sp, 0, 0); + packet->size = packet->overhead; + + if (!asoc) + return; /* update dst or transport pathmtu if in need */ - sk = asoc->base.sk; if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(asoc); - } else if (!sctp_transport_pmtu_check(tp)) { + sctp_transport_route(tp, NULL, sp); if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index f211b3db6a35..dee7cbd54831 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1457,7 +1457,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * the outstanding bytes for this chunk, so only * count bytes associated with a transport. */ - if (transport) { + if (transport && !tchunk->tsn_gap_acked) { /* If this chunk is being used for RTT * measurement, calculate the RTT and update * the RTO using this value. @@ -1469,14 +1469,34 @@ static void sctp_check_transmitted(struct sctp_outq *q, * first instance of the packet or a later * instance). */ - if (!tchunk->tsn_gap_acked && - !sctp_chunk_retransmitted(tchunk) && + if (!sctp_chunk_retransmitted(tchunk) && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; sctp_transport_update_rto(transport, rtt); } + + if (TSN_lte(tsn, sack_ctsn)) { + /* + * SFR-CACC algorithm: + * 2) If the SACK contains gap acks + * and the flag CHANGEOVER_ACTIVE is + * set the receiver of the SACK MUST + * take the following action: + * + * B) For each TSN t being acked that + * has not been acked in any SACK so + * far, set cacc_saw_newack to 1 for + * the destination that the TSN was + * sent to. + */ + if (sack->num_gap_ack_blocks && + q->asoc->peer.primary_path->cacc. + changeover_active) + transport->cacc.cacc_saw_newack + = 1; + } } /* If the chunk hasn't been marked as ACKED, @@ -1508,28 +1528,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, restart_timer = 1; forward_progress = true; - if (!tchunk->tsn_gap_acked) { - /* - * SFR-CACC algorithm: - * 2) If the SACK contains gap acks - * and the flag CHANGEOVER_ACTIVE is - * set the receiver of the SACK MUST - * take the following action: - * - * B) For each TSN t being acked that - * has not been acked in any SACK so - * far, set cacc_saw_newack to 1 for - * the destination that the TSN was - * sent to. - */ - if (transport && - sack->num_gap_ack_blocks && - q->asoc->peer.primary_path->cacc. - changeover_active) - transport->cacc.cacc_saw_newack - = 1; - } - list_add_tail(&tchunk->transmitted_list, &q->sacked); } else { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5a4fb1dc8400..4d7b3ccea078 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -81,8 +81,6 @@ static int sctp_process_param(struct sctp_association *asoc, gfp_t gfp); static void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data); -static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, - const void *data); /* Control chunk destructor */ static void sctp_control_release_owner(struct sk_buff *skb) @@ -154,12 +152,11 @@ static const struct sctp_paramhdr prsctp_param = { cpu_to_be16(sizeof(struct sctp_paramhdr)), }; -/* A helper to initialize an op error inside a - * provided chunk, as most cause codes will be embedded inside an - * abort chunk. +/* A helper to initialize an op error inside a provided chunk, as most + * cause codes will be embedded inside an abort chunk. */ -void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, - size_t paylen) +int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, + size_t paylen) { struct sctp_errhdr err; __u16 len; @@ -167,33 +164,16 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, /* Cause code constants are now defined in network order. */ err.cause = cause_code; len = sizeof(err) + paylen; - err.length = htons(len); - chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err); -} - -/* A helper to initialize an op error inside a - * provided chunk, as most cause codes will be embedded inside an - * abort chunk. Differs from sctp_init_cause in that it won't oops - * if there isn't enough space in the op error chunk - */ -static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, - size_t paylen) -{ - struct sctp_errhdr err; - __u16 len; - - /* Cause code constants are now defined in network order. */ - err.cause = cause_code; - len = sizeof(err) + paylen; - err.length = htons(len); + err.length = htons(len); if (skb_tailroom(chunk->skb) < len) return -ENOSPC; - chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err); + chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err); return 0; } + /* 3.3.2 Initiation (INIT) (1) * * This chunk is used to initiate a SCTP association between two @@ -779,10 +759,9 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, * association. This reports on which TSN's we've seen to date, * including duplicates and gaps. */ -struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) +struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc) { struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; - struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; __u16 num_gabs, num_dup_tsns; struct sctp_transport *trans; @@ -857,7 +836,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) /* Add the duplicate TSN information. */ if (num_dup_tsns) { - aptr->stats.idupchunks += num_dup_tsns; + asoc->stats.idupchunks += num_dup_tsns; sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns, sctp_tsnmap_get_dups(map)); } @@ -869,11 +848,11 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) * association so no transport will match after a wrap event like this, * Until the next sack */ - if (++aptr->peer.sack_generation == 0) { + if (++asoc->peer.sack_generation == 0) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) trans->sack_generation = 0; - aptr->peer.sack_generation = 1; + asoc->peer.sack_generation = 1; } nodata: return retval; @@ -1258,20 +1237,26 @@ nodata: return retval; } -/* Create an Operation Error chunk of a fixed size, - * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) - * This is a helper function to allocate an error chunk for - * for those invalid parameter codes in which we may not want - * to report all the errors, if the incoming chunk is large +/* Create an Operation Error chunk of a fixed size, specifically, + * min(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) - overheads. + * This is a helper function to allocate an error chunk for for those + * invalid parameter codes in which we may not want to report all the + * errors, if the incoming chunk is large. If it can't fit in a single + * packet, we ignore it. */ -static inline struct sctp_chunk *sctp_make_op_error_fixed( +static inline struct sctp_chunk *sctp_make_op_error_limited( const struct sctp_association *asoc, const struct sctp_chunk *chunk) { - size_t size = asoc ? asoc->pathmtu : 0; + size_t size = SCTP_DEFAULT_MAXSEGMENT; + struct sctp_sock *sp = NULL; - if (!size) - size = SCTP_DEFAULT_MAXSEGMENT; + if (asoc) { + size = min_t(size_t, size, asoc->pathmtu); + sp = sctp_sk(asoc->base.sk); + } + + size = sctp_mtu_payload(sp, size, sizeof(struct sctp_errhdr)); return sctp_make_op_error_space(asoc, chunk, size); } @@ -1523,18 +1508,6 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) return target; } -/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient - * space in the chunk - */ -static void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, - int len, const void *data) -{ - if (skb_tailroom(chunk->skb) >= len) - return sctp_addto_chunk(chunk, len, data); - else - return NULL; -} - /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. @@ -1829,6 +1802,9 @@ no_hmac: kt = ktime_get_real(); if (!asoc && ktime_before(bear_cookie->expiration, kt)) { + suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration)); + __be32 n = htonl(usecs); + /* * Section 3.3.10.3 Stale Cookie Error (3) * @@ -1837,17 +1813,12 @@ no_hmac: * Stale Cookie Error: Indicates the receipt of a valid State * Cookie that has expired. */ - len = ntohs(chunk->chunk_hdr->length); - *errp = sctp_make_op_error_space(asoc, chunk, len); - if (*errp) { - suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration)); - __be32 n = htonl(usecs); - - sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, - sizeof(n)); - sctp_addto_chunk(*errp, sizeof(n), &n); + *errp = sctp_make_op_error(asoc, chunk, + SCTP_ERROR_STALE_COOKIE, &n, + sizeof(n), 0); + if (*errp) *error = -SCTP_IERROR_STALE_COOKIE; - } else + else *error = -SCTP_IERROR_NOMEM; goto fail; @@ -1998,12 +1969,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, if (*errp) sctp_chunk_free(*errp); - *errp = sctp_make_op_error_space(asoc, chunk, len); - - if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, len); - sctp_addto_chunk(*errp, len, param.v); - } + *errp = sctp_make_op_error(asoc, chunk, SCTP_ERROR_DNS_FAILED, + param.v, len, 0); /* Stop processing this chunk. */ return 0; @@ -2128,23 +2095,23 @@ static enum sctp_ierror sctp_process_unk_param( /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. */ - if (NULL == *errp) - *errp = sctp_make_op_error_fixed(asoc, chunk); - - if (*errp) { - if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - SCTP_PAD4(ntohs(param.p->length)))) - sctp_addto_chunk_fixed(*errp, - SCTP_PAD4(ntohs(param.p->length)), - param.v); - } else { - /* If there is no memory for generating the ERROR - * report as specified, an ABORT will be triggered - * to the peer and the association won't be - * established. - */ - retval = SCTP_IERROR_NOMEM; + if (!*errp) { + *errp = sctp_make_op_error_limited(asoc, chunk); + if (!*errp) { + /* If there is no memory for generating the + * ERROR report as specified, an ABORT will be + * triggered to the peer and the association + * won't be established. + */ + retval = SCTP_IERROR_NOMEM; + break; + } } + + if (!sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, + ntohs(param.p->length))) + sctp_addto_chunk(*errp, ntohs(param.p->length), + param.v); break; default: break; @@ -2220,10 +2187,10 @@ static enum sctp_ierror sctp_verify_param(struct net *net, * MUST be aborted. The ABORT chunk SHOULD contain the error * cause 'Protocol Violation'. */ - if (SCTP_AUTH_RANDOM_LENGTH != - ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) { + if (SCTP_AUTH_RANDOM_LENGTH != ntohs(param.p->length) - + sizeof(struct sctp_paramhdr)) { sctp_process_inv_paramlength(asoc, param.p, - chunk, err_chunk); + chunk, err_chunk); retval = SCTP_IERROR_ABORT; } break; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 80835ac26d2c..1b4593b842b0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -644,16 +644,15 @@ static int sctp_send_asconf_add_ip(struct sock *sk, list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { - /* Clear the source and route cache */ - sctp_transport_dst_release(trans); trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); trans->ssthresh = asoc->peer.i.a_rwnd; trans->rto = asoc->rto_initial; sctp_max_rto(asoc, trans); trans->rtt = trans->srtt = trans->rttvar = 0; + /* Clear the source and route cache */ sctp_transport_route(trans, NULL, - sctp_sk(asoc->base.sk)); + sctp_sk(asoc->base.sk)); } } retval = sctp_send_asconf(asoc, chunk); @@ -896,7 +895,6 @@ skip_mkasconf: */ list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - sctp_transport_dst_release(transport); sctp_transport_route(transport, NULL, sctp_sk(asoc->base.sk)); } @@ -1895,6 +1893,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo) { struct sock *sk = asoc->base.sk; + struct sctp_sock *sp = sctp_sk(sk); struct net *net = sock_net(sk); struct sctp_datamsg *datamsg; bool wait_connect = false; @@ -1913,13 +1912,16 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, goto err; } - if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) { + if (sp->disable_fragments && msg_len > asoc->frag_point) { err = -EMSGSIZE; goto err; } - if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(asoc); + if (asoc->pmtu_pending) { + if (sp->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); + asoc->pmtu_pending = 0; + } if (sctp_wspace(asoc) < msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); @@ -1936,7 +1938,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (err) goto err; - if (sctp_sk(sk)->strm_interleave) { + if (sp->strm_interleave) { timeo = sock_sndtimeo(sk, 0); err = sctp_wait_for_connect(asoc, &timeo); if (err) @@ -2539,7 +2541,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, trans->pathmtu = params->spp_pathmtu; sctp_assoc_sync_pmtu(asoc); } else if (asoc) { - asoc->pathmtu = params->spp_pathmtu; + sctp_assoc_set_pmtu(asoc, params->spp_pathmtu); } else { sp->pathmtu = params->spp_pathmtu; } @@ -3209,7 +3211,6 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct sctp_af *af = sp->pf->af; struct sctp_assoc_value params; struct sctp_association *asoc; int val; @@ -3231,30 +3232,24 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned return -EINVAL; } + asoc = sctp_id2assoc(sk, params.assoc_id); + if (val) { int min_len, max_len; + __u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) : + sizeof(struct sctp_data_chunk); - min_len = SCTP_DEFAULT_MINSEGMENT - af->net_header_len; - min_len -= af->ip_options_len(sk); - min_len -= sizeof(struct sctphdr) + - sizeof(struct sctp_data_chunk); - - max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk); + min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, + datasize); + max_len = SCTP_MAX_CHUNK_LEN - datasize; if (val < min_len || val > max_len) return -EINVAL; } - asoc = sctp_id2assoc(sk, params.assoc_id); if (asoc) { - if (val == 0) { - val = asoc->pathmtu - af->net_header_len; - val -= af->ip_options_len(sk); - val -= sizeof(struct sctphdr) + - sctp_datachk_len(&asoc->stream); - } asoc->user_frag = val; - asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); + sctp_assoc_update_frag_point(asoc); } else { if (params.assoc_id && sctp_style(sk, UDP)) return -EINVAL; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 47f82bd794d9..4a95e260b674 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -242,9 +242,18 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) &transport->fl, sk); } - if (transport->dst) { - transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); - } else + if (transport->param_flags & SPP_PMTUD_DISABLE) { + struct sctp_association *asoc = transport->asoc; + + if (!transport->pathmtu && asoc && asoc->pathmtu) + transport->pathmtu = asoc->pathmtu; + if (transport->pathmtu) + return; + } + + if (transport->dst) + transport->pathmtu = sctp_dst_mtu(transport->dst); + else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -290,6 +299,7 @@ void sctp_transport_route(struct sctp_transport *transport, struct sctp_association *asoc = transport->asoc; struct sctp_af *af = transport->af_specific; + sctp_transport_dst_release(transport); af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt)); if (saddr) @@ -297,21 +307,14 @@ void sctp_transport_route(struct sctp_transport *transport, else af->get_saddr(opt, transport, &transport->fl); - if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { - return; - } - if (transport->dst) { - transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); + sctp_transport_pmtu(transport, sctp_opt2sk(opt)); - /* Initialize sk->sk_rcv_saddr, if the transport is the - * association's active path for getsockname(). - */ - if (asoc && (!asoc->peer.primary_path || - (transport == asoc->peer.active_path))) - opt->pf->to_sk_saddr(&transport->saddr, - asoc->base.sk); - } else - transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; + /* Initialize sk->sk_rcv_saddr, if the transport is the + * association's active path for getsockname(). + */ + if (transport->dst && asoc && + (!asoc->peer.primary_path || transport == asoc->peer.active_path)) + opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk); } /* Hold a reference to a transport. */ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 544bab42f925..95b00e4c3396 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -29,6 +29,7 @@ #include <net/sock.h> #include <net/tcp.h> #include <net/smc.h> +#include <asm/ioctls.h> #include "smc.h" #include "smc_clc.h" @@ -305,6 +306,7 @@ static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) static int smc_clnt_conf_first_link(struct smc_sock *smc) { + struct net *net = sock_net(smc->clcsock->sk); struct smc_link_group *lgr = smc->conn.lgr; struct smc_link *link; int rest; @@ -362,7 +364,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) if (rc < 0) return SMC_CLC_DECL_TCL; - link->state = SMC_LNK_ACTIVE; + smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); return 0; } @@ -400,6 +402,9 @@ static int smc_connect_rdma(struct smc_sock *smc) sock_hold(&smc->sk); /* sock put in passive closing */ + if (smc->use_fallback) + goto out_connected; + if (!tcp_sk(smc->clcsock->sk)->syn_smc) { /* peer has not signalled SMC-capability */ smc->use_fallback = true; @@ -716,6 +721,7 @@ void smc_close_non_accepted(struct sock *sk) static int smc_serv_conf_first_link(struct smc_sock *smc) { + struct net *net = sock_net(smc->clcsock->sk); struct smc_link_group *lgr = smc->conn.lgr; struct smc_link *link; int rest; @@ -768,7 +774,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) return rc; } - link->state = SMC_LNK_ACTIVE; + smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); return 0; } @@ -792,6 +798,9 @@ static void smc_listen_work(struct work_struct *work) int rc = 0; u8 ibport; + if (new_smc->use_fallback) + goto out_connected; + /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { new_smc->use_fallback = true; @@ -965,7 +974,7 @@ static void smc_tcp_listen_work(struct work_struct *work) continue; new_smc->listen_smc = lsmc; - new_smc->use_fallback = false; /* assume rdma capability first*/ + new_smc->use_fallback = lsmc->use_fallback; sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); @@ -1001,7 +1010,8 @@ static int smc_listen(struct socket *sock, int backlog) * them to the clc socket -- copy smc socket options to clc socket */ smc_copy_sock_settings_to_clc(smc); - tcp_sk(smc->clcsock->sk)->syn_smc = 1; + if (!smc->use_fallback) + tcp_sk(smc->clcsock->sk)->syn_smc = 1; rc = kernel_listen(smc->clcsock, backlog); if (rc) @@ -1034,6 +1044,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, if (lsmc->sk.sk_state != SMC_LISTEN) { rc = -EINVAL; + release_sock(sk); goto out; } @@ -1061,9 +1072,29 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, if (!rc) rc = sock_error(nsk); + release_sock(sk); + if (rc) + goto out; + + if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { + /* wait till data arrives on the socket */ + timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * + MSEC_PER_SEC); + if (smc_sk(nsk)->use_fallback) { + struct sock *clcsk = smc_sk(nsk)->clcsock->sk; + + lock_sock(clcsk); + if (skb_queue_empty(&clcsk->sk_receive_queue)) + sk_wait_data(clcsk, &timeo, NULL); + release_sock(clcsk); + } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { + lock_sock(nsk); + smc_rx_wait_data(smc_sk(nsk), &timeo); + release_sock(nsk); + } + } out: - release_sock(sk); sock_put(sk); /* sock_hold above */ return rc; } @@ -1094,6 +1125,16 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) (sk->sk_state != SMC_APPCLOSEWAIT1) && (sk->sk_state != SMC_INIT)) goto out; + + if (msg->msg_flags & MSG_FASTOPEN) { + if (sk->sk_state == SMC_INIT) { + smc->use_fallback = true; + } else { + rc = -EINVAL; + goto out; + } + } + if (smc->use_fallback) rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); else @@ -1273,14 +1314,64 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct smc_sock *smc; + int val, rc; smc = smc_sk(sk); /* generic setsockopts reaching us here always apply to the * CLC socket */ - return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, - optval, optlen); + rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, + optval, optlen); + if (smc->clcsock->sk->sk_err) { + sk->sk_err = smc->clcsock->sk->sk_err; + sk->sk_error_report(sk); + } + if (rc) + return rc; + + if (optlen < sizeof(int)) + return rc; + get_user(val, (int __user *)optval); + + lock_sock(sk); + switch (optname) { + case TCP_ULP: + case TCP_FASTOPEN: + case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_KEY: + case TCP_FASTOPEN_NO_COOKIE: + /* option not supported by SMC */ + if (sk->sk_state == SMC_INIT) { + smc->use_fallback = true; + } else { + if (!smc->use_fallback) + rc = -EINVAL; + } + break; + case TCP_NODELAY: + if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { + if (val) + mod_delayed_work(system_wq, &smc->conn.tx_work, + 0); + } + break; + case TCP_CORK: + if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { + if (!val) + mod_delayed_work(system_wq, &smc->conn.tx_work, + 0); + } + break; + case TCP_DEFER_ACCEPT: + smc->sockopt_defer_accept = val; + break; + default: + break; + } + release_sock(sk); + + return rc; } static int smc_getsockopt(struct socket *sock, int level, int optname, @@ -1298,12 +1389,38 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct smc_sock *smc; + int answ; smc = smc_sk(sock->sk); - if (smc->use_fallback) + if (smc->use_fallback) { + if (!smc->clcsock) + return -EBADF; return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); - else - return sock_no_ioctl(sock, cmd, arg); + } + switch (cmd) { + case SIOCINQ: /* same as FIONREAD */ + if (smc->sk.sk_state == SMC_LISTEN) + return -EINVAL; + answ = atomic_read(&smc->conn.bytes_to_rcv); + break; + case SIOCOUTQ: + /* output queue size (not send + not acked) */ + if (smc->sk.sk_state == SMC_LISTEN) + return -EINVAL; + answ = smc->conn.sndbuf_size - + atomic_read(&smc->conn.sndbuf_space); + break; + case SIOCOUTQNSD: + /* output queue size (not send only) */ + if (smc->sk.sk_state == SMC_LISTEN) + return -EINVAL; + answ = smc_tx_prepared_sends(&smc->conn); + break; + default: + return -ENOIOCTLCMD; + } + + return put_user(answ, (int __user *)arg); } static ssize_t smc_sendpage(struct socket *sock, struct page *page, diff --git a/net/smc/smc.h b/net/smc/smc.h index e4829a2f46ba..2405e889b93d 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -180,6 +180,10 @@ struct smc_sock { /* smc sock container */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ bool use_fallback; /* fallback to tcp */ + int sockopt_defer_accept; + /* sockopt TCP_DEFER_ACCEPT + * value + */ u8 wait_close_tx_prepared : 1; /* shutdown wr or close * started, waiting for unsent diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index b42395d24cba..42ad57365eca 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -82,7 +82,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn, sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE, "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)"); BUILD_BUG_ON_MSG( - offsetof(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE, + sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE, "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()"); BUILD_BUG_ON_MSG( sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE, diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index ab240b37ad11..d2012fd22100 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -48,7 +48,7 @@ struct smc_cdc_msg { struct smc_cdc_producer_flags prod_flags; struct smc_cdc_conn_state_flags conn_state_flags; u8 reserved[18]; -} __aligned(8); +} __packed; /* format defined in RFC7609 */ static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn) { diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d4bd01bb44e1..79aa7d312d6b 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -326,6 +326,7 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) /* remove a link group */ void smc_lgr_free(struct smc_link_group *lgr) { + smc_llc_link_flush(&lgr->lnk[SMC_SINGLE_LINK]); smc_lgr_free_bufs(lgr); smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); kfree(lgr); @@ -348,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) struct rb_node *node; smc_lgr_forget(lgr); + smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); @@ -374,7 +376,8 @@ void smc_lgr_terminate(struct smc_link_group *lgr) static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) { struct dst_entry *dst = sk_dst_get(clcsock->sk); - int rc = 0; + struct net_device *ndev; + int i, nest_lvl, rc = 0; *vlan_id = 0; if (!dst) { @@ -386,8 +389,27 @@ static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) goto out_rel; } - if (is_vlan_dev(dst->dev)) - *vlan_id = vlan_dev_vlan_id(dst->dev); + ndev = dst->dev; + if (is_vlan_dev(ndev)) { + *vlan_id = vlan_dev_vlan_id(ndev); + goto out_rel; + } + + rtnl_lock(); + nest_lvl = dev_get_nest_level(ndev); + for (i = 0; i < nest_lvl; i++) { + struct list_head *lower = &ndev->adj_list.lower; + + if (list_empty(lower)) + break; + lower = lower->next; + ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); + if (is_vlan_dev(ndev)) { + *vlan_id = vlan_dev_vlan_id(ndev); + break; + } + } + rtnl_unlock(); out_rel: dst_release(dst); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 5dfcb15d529f..70ef4f47dc8d 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -79,6 +79,7 @@ struct smc_link { dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ u64 wr_rx_id; /* seq # of last recv WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ + unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ struct ib_reg_wr wr_reg; /* WR register memory region */ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ @@ -101,6 +102,9 @@ struct smc_link { int llc_confirm_resp_rc; /* rc from conf_resp msg */ struct completion llc_add; /* wait for rx of add link */ struct completion llc_add_resp; /* wait for rx of add link rsp*/ + struct delayed_work llc_testlink_wrk; /* testlink worker */ + struct completion llc_testlink_resp; /* wait for rx of testlink */ + int llc_testlink_time; /* testlink interval */ }; /* For now we just allow one parallel link per link group. The SMC protocol diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 427b91c1c964..05dd7e6d314d 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,17 +38,27 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); - r->diag_family = sk->sk_family; if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; sock_diag_save_cookie(sk, r->id.idiag_cookie); - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); - r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; - r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; + if (sk->sk_protocol == SMCPROTO_SMC) { + r->diag_family = PF_INET; + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; + r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_protocol == SMCPROTO_SMC6) { + r->diag_family = PF_INET6; + memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr, + sizeof(smc->clcsock->sk->sk_v6_rcv_saddr)); + memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr, + sizeof(smc->clcsock->sk->sk_v6_daddr)); +#endif + } } static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, @@ -153,7 +163,8 @@ errout: return -EMSGSIZE; } -static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb, + struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlattr *bc = NULL; @@ -161,8 +172,8 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct sock *sk; int rc = 0; - read_lock(&smc_proto.h.smc_hash->lock); - head = &smc_proto.h.smc_hash->ht; + read_lock(&prot->h.smc_hash->lock); + head = &prot->h.smc_hash->ht; if (hlist_empty(head)) goto out; @@ -175,7 +186,17 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } out: - read_unlock(&smc_proto.h.smc_hash->lock); + read_unlock(&prot->h.smc_hash->lock); + return rc; +} + +static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int rc = 0; + + rc = smc_diag_dump_proto(&smc_proto, skb, cb); + if (!rc) + rc = smc_diag_dump_proto(&smc_proto6, skb, cb); return rc; } diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index ea4b21981b4b..33b4d856f4c6 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -397,7 +397,8 @@ static void smc_llc_rx_test_link(struct smc_link *link, struct smc_llc_msg_test_link *llc) { if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - /* unused as long as we don't send this type of msg */ + if (link->state == SMC_LNK_ACTIVE) + complete(&link->llc_testlink_resp); } else { smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP); } @@ -502,6 +503,65 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) } } +/***************************** worker ****************************************/ + +static void smc_llc_testlink_work(struct work_struct *work) +{ + struct smc_link *link = container_of(to_delayed_work(work), + struct smc_link, llc_testlink_wrk); + unsigned long next_interval; + struct smc_link_group *lgr; + unsigned long expire_time; + u8 user_data[16] = { 0 }; + int rc; + + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + if (link->state != SMC_LNK_ACTIVE) + return; /* don't reschedule worker */ + expire_time = link->wr_rx_tstamp + link->llc_testlink_time; + if (time_is_after_jiffies(expire_time)) { + next_interval = expire_time - jiffies; + goto out; + } + reinit_completion(&link->llc_testlink_resp); + smc_llc_send_test_link(link, user_data, SMC_LLC_REQ); + /* receive TEST LINK response over RoCE fabric */ + rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, + SMC_LLC_WAIT_TIME); + if (rc <= 0) { + smc_lgr_terminate(lgr); + return; + } + next_interval = link->llc_testlink_time; +out: + schedule_delayed_work(&link->llc_testlink_wrk, next_interval); +} + +void smc_llc_link_active(struct smc_link *link, int testlink_time) +{ + init_completion(&link->llc_testlink_resp); + INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work); + link->state = SMC_LNK_ACTIVE; + if (testlink_time) { + link->llc_testlink_time = testlink_time * HZ; + schedule_delayed_work(&link->llc_testlink_wrk, + link->llc_testlink_time); + } +} + +/* called in tasklet context */ +void smc_llc_link_inactive(struct smc_link *link) +{ + link->state = SMC_LNK_INACTIVE; + cancel_delayed_work(&link->llc_testlink_wrk); +} + +/* called in worker context */ +void smc_llc_link_flush(struct smc_link *link) +{ + cancel_delayed_work_sync(&link->llc_testlink_wrk); +} + /***************************** init, exit, misc ******************************/ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index e4a7d5e234d5..d6e42116485e 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -44,6 +44,9 @@ int smc_llc_send_delete_link(struct smc_link *link, enum smc_llc_reqresp reqresp); int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16], enum smc_llc_reqresp reqresp); +void smc_llc_link_active(struct smc_link *link, int testlink_time); +void smc_llc_link_inactive(struct smc_link *link); +void smc_llc_link_flush(struct smc_link *link); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index eff4e0d0bb31..af851d8df1f8 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -51,7 +51,7 @@ static void smc_rx_data_ready(struct sock *sk) * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown. * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted). */ -static int smc_rx_wait_data(struct smc_sock *smc, long *timeo) +int smc_rx_wait_data(struct smc_sock *smc, long *timeo) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct smc_connection *conn = &smc->conn; diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h index 3a32b59bf06c..0b75a6b470e6 100644 --- a/net/smc/smc_rx.h +++ b/net/smc/smc_rx.h @@ -20,5 +20,6 @@ void smc_rx_init(struct smc_sock *smc); int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, int flags); +int smc_rx_wait_data(struct smc_sock *smc, long *timeo); #endif /* SMC_RX_H */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 72f004c9c9b1..58dfe0bd9d60 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -19,6 +19,7 @@ #include <linux/sched/signal.h> #include <net/sock.h> +#include <net/tcp.h> #include "smc.h" #include "smc_wr.h" @@ -26,6 +27,7 @@ #include "smc_tx.h" #define SMC_TX_WORK_DELAY HZ +#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ /***************************** sndbuf producer *******************************/ @@ -115,6 +117,13 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) return rc; } +static bool smc_tx_is_corked(struct smc_sock *smc) +{ + struct tcp_sock *tp = tcp_sk(smc->clcsock->sk); + + return (tp->nonagle & TCP_NAGLE_CORK) ? true : false; +} + /* sndbuf producer: main API called by socket layer. * called under sock lock. */ @@ -209,7 +218,16 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) /* since we just produced more new data into sndbuf, * trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - smc_tx_sndbuf_nonempty(conn); + if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) && + (atomic_read(&conn->sndbuf_space) > + (conn->sndbuf_size >> 1))) + /* for a corked socket defer the RDMA writes if there + * is still sufficient sndbuf_space available + */ + schedule_delayed_work(&conn->tx_work, + SMC_TX_CORK_DELAY); + else + smc_tx_sndbuf_nonempty(conn); } /* while (msg_data_left(msg)) */ return send_done; @@ -409,8 +427,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) } rc = 0; if (conn->alert_token_local) /* connection healthy */ - schedule_delayed_work(&conn->tx_work, - SMC_TX_WORK_DELAY); + mod_delayed_work(system_wq, &conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 1b8af23e6e2b..cc7c1bb60fe8 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -376,6 +376,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) for (i = 0; i < num; i++) { link = wc[i].qp->qp_context; if (wc[i].status == IB_WC_SUCCESS) { + link->wr_rx_tstamp = jiffies; smc_wr_rx_demultiplex(&wc[i]); smc_wr_rx_post(link); /* refill WR RX */ } else { diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index f7d47c89d658..2dfb492a7c94 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -697,6 +697,9 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) goto prop_msg_full; + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); @@ -979,12 +982,23 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_TOL]) { b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - tipc_node_apply_tolerance(net, b); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL); } if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; +#ifdef CONFIG_TIPC_MEDIA_UDP + if (tipc_udp_mtu_bad(nla_get_u32 + (props[TIPC_NLA_PROP_MTU]))) + return -EINVAL; + b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); +#endif + } } return 0; @@ -1029,6 +1043,9 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) goto prop_msg_full; + if (media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); nla_nest_end(msg->skb, attrs); @@ -1158,6 +1175,16 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (m->type_id != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; +#ifdef CONFIG_TIPC_MEDIA_UDP + if (tipc_udp_mtu_bad(nla_get_u32 + (props[TIPC_NLA_PROP_MTU]))) + return -EINVAL; + m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); +#endif + } } return 0; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6efcee63a381..394290cbbb1d 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -94,6 +94,8 @@ struct tipc_bearer; * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion + * @mtu: max packet size bearer can support for media type not dependent on + * underlying device MTU * @type_id: TIPC media identifier * @hwaddr_len: TIPC media address len * @name: media name @@ -118,6 +120,7 @@ struct tipc_media { u32 priority; u32 tolerance; u32 window; + u32 mtu; u32 type_id; u32 hwaddr_len; char name[TIPC_MAX_MEDIA_NAME]; diff --git a/net/tipc/node.c b/net/tipc/node.c index baaf93f12cbd..b0b840084f0a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -195,6 +195,27 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) return mtu; } +bool tipc_node_get_id(struct net *net, u32 addr, u8 *id) +{ + u8 *own_id = tipc_own_id(net); + struct tipc_node *n; + + if (!own_id) + return true; + + if (addr == tipc_own_addr(net)) { + memcpy(id, own_id, TIPC_NODEID_LEN); + return true; + } + n = tipc_node_find(net, addr); + if (!n) + return false; + + memcpy(id, &n->peer_id, TIPC_NODEID_LEN); + tipc_node_put(n); + return true; +} + u16 tipc_node_get_capabilities(struct net *net, u32 addr) { struct tipc_node *n; @@ -1681,7 +1702,8 @@ discard: kfree_skb(skb); } -void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b) +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, + int prop) { struct tipc_net *tn = tipc_net(net); int bearer_id = b->identity; @@ -1696,8 +1718,13 @@ void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b) list_for_each_entry_rcu(n, &tn->node_list, list) { tipc_node_write_lock(n); e = &n->links[bearer_id]; - if (e->link) - tipc_link_set_tolerance(e->link, b->tolerance, &xmitq); + if (e->link) { + if (prop == TIPC_NLA_PROP_TOL) + tipc_link_set_tolerance(e->link, b->tolerance, + &xmitq); + else if (prop == TIPC_NLA_PROP_MTU) + tipc_link_set_mtu(e->link, b->mtu); + } tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index f24b83500df1..846c8f240872 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -60,6 +60,7 @@ enum { #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); +bool tipc_node_get_id(struct net *net, u32 addr, u8 *id); u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr); void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, struct tipc_bearer *bearer, @@ -67,7 +68,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); -void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b); +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 252a52ae0893..c4992002fd68 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2973,7 +2973,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct sock *sk = sock->sk; + struct net *net = sock_net(sock->sk); + struct tipc_sioc_nodeid_req nr = {0}; struct tipc_sioc_ln_req lnr; void __user *argp = (void __user *)arg; @@ -2981,7 +2982,7 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(sock_net(sk), + if (!tipc_node_get_linkname(net, lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) @@ -2989,6 +2990,14 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return 0; } return -EADDRNOTAVAIL; + case SIOCGETNODEID: + if (copy_from_user(&nr, argp, sizeof(nr))) + return -EFAULT; + if (!tipc_node_get_id(net, nr.peer, nr.node_id)) + return -EADDRNOTAVAIL; + if (copy_to_user(argp, &nr, sizeof(nr))) + return -EFAULT; + return 0; default: return -ENOIOCTLCMD; } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index e7d91f5d5cae..9783101bc4a9 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -713,8 +713,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, err = -EINVAL; goto err; } - b->mtu = dev->mtu - sizeof(struct iphdr) - - sizeof(struct udphdr); + b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { udp_conf.family = AF_INET6; @@ -803,6 +802,7 @@ struct tipc_media udp_media_info = { .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, + .mtu = TIPC_DEF_LINK_UDP_MTU, .type_id = TIPC_MEDIA_TYPE_UDP, .hwaddr_len = 0, .name = "udp" diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h index 281bbae87726..e7455cc73e16 100644 --- a/net/tipc/udp_media.h +++ b/net/tipc/udp_media.h @@ -38,9 +38,23 @@ #ifndef _TIPC_UDP_MEDIA_H #define _TIPC_UDP_MEDIA_H +#include <linux/ip.h> +#include <linux/udp.h> + int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b); int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb); +/* check if configured MTU is too low for tipc headers */ +static inline bool tipc_udp_mtu_bad(u32 mtu) +{ + if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) + + sizeof(struct udphdr))) + return false; + + pr_warn("MTU too low for tipc bearer\n"); + return true; +} + #endif #endif diff --git a/net/tls/Kconfig b/net/tls/Kconfig index 89b8745a986f..73f05ece53d0 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -14,3 +14,13 @@ config TLS encryption handling of the TLS protocol to be done in-kernel. If unsure, say N. + +config TLS_DEVICE + bool "Transport Layer Security HW offload" + depends on TLS + select SOCK_VALIDATE_XMIT + default n + help + Enable kernel support for HW offload of the TLS protocol. + + If unsure, say N. diff --git a/net/tls/Makefile b/net/tls/Makefile index a930fd1c4f7b..4d6b728a67d0 100644 --- a/net/tls/Makefile +++ b/net/tls/Makefile @@ -5,3 +5,5 @@ obj-$(CONFIG_TLS) += tls.o tls-y := tls_main.o tls_sw.o + +tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c new file mode 100644 index 000000000000..ac45d6224e41 --- /dev/null +++ b/net/tls/tls_device.c @@ -0,0 +1,764 @@ +/* Copyright (c) 2018, Mellanox Technologies All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <crypto/aead.h> +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <net/dst.h> +#include <net/inet_connection_sock.h> +#include <net/tcp.h> +#include <net/tls.h> + +/* device_offload_lock is used to synchronize tls_dev_add + * against NETDEV_DOWN notifications. + */ +static DECLARE_RWSEM(device_offload_lock); + +static void tls_device_gc_task(struct work_struct *work); + +static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task); +static LIST_HEAD(tls_device_gc_list); +static LIST_HEAD(tls_device_list); +static DEFINE_SPINLOCK(tls_device_lock); + +static void tls_device_free_ctx(struct tls_context *ctx) +{ + struct tls_offload_context *offload_ctx = tls_offload_ctx(ctx); + + kfree(offload_ctx); + kfree(ctx); +} + +static void tls_device_gc_task(struct work_struct *work) +{ + struct tls_context *ctx, *tmp; + unsigned long flags; + LIST_HEAD(gc_list); + + spin_lock_irqsave(&tls_device_lock, flags); + list_splice_init(&tls_device_gc_list, &gc_list); + spin_unlock_irqrestore(&tls_device_lock, flags); + + list_for_each_entry_safe(ctx, tmp, &gc_list, list) { + struct net_device *netdev = ctx->netdev; + + if (netdev) { + netdev->tlsdev_ops->tls_dev_del(netdev, ctx, + TLS_OFFLOAD_CTX_DIR_TX); + dev_put(netdev); + } + + list_del(&ctx->list); + tls_device_free_ctx(ctx); + } +} + +static void tls_device_queue_ctx_destruction(struct tls_context *ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&tls_device_lock, flags); + list_move_tail(&ctx->list, &tls_device_gc_list); + + /* schedule_work inside the spinlock + * to make sure tls_device_down waits for that work. + */ + schedule_work(&tls_device_gc_work); + + spin_unlock_irqrestore(&tls_device_lock, flags); +} + +/* We assume that the socket is already connected */ +static struct net_device *get_netdev_for_sock(struct sock *sk) +{ + struct dst_entry *dst = sk_dst_get(sk); + struct net_device *netdev = NULL; + + if (likely(dst)) { + netdev = dst->dev; + dev_hold(netdev); + } + + dst_release(dst); + + return netdev; +} + +static void destroy_record(struct tls_record_info *record) +{ + int nr_frags = record->num_frags; + skb_frag_t *frag; + + while (nr_frags-- > 0) { + frag = &record->frags[nr_frags]; + __skb_frag_unref(frag); + } + kfree(record); +} + +static void delete_all_records(struct tls_offload_context *offload_ctx) +{ + struct tls_record_info *info, *temp; + + list_for_each_entry_safe(info, temp, &offload_ctx->records_list, list) { + list_del(&info->list); + destroy_record(info); + } + + offload_ctx->retransmit_hint = NULL; +} + +static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_record_info *info, *temp; + struct tls_offload_context *ctx; + u64 deleted_records = 0; + unsigned long flags; + + if (!tls_ctx) + return; + + ctx = tls_offload_ctx(tls_ctx); + + spin_lock_irqsave(&ctx->lock, flags); + info = ctx->retransmit_hint; + if (info && !before(acked_seq, info->end_seq)) { + ctx->retransmit_hint = NULL; + list_del(&info->list); + destroy_record(info); + deleted_records++; + } + + list_for_each_entry_safe(info, temp, &ctx->records_list, list) { + if (before(acked_seq, info->end_seq)) + break; + list_del(&info->list); + + destroy_record(info); + deleted_records++; + } + + ctx->unacked_record_sn += deleted_records; + spin_unlock_irqrestore(&ctx->lock, flags); +} + +/* At this point, there should be no references on this + * socket and no in-flight SKBs associated with this + * socket, so it is safe to free all the resources. + */ +void tls_device_sk_destruct(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx); + + if (ctx->open_record) + destroy_record(ctx->open_record); + + delete_all_records(ctx); + crypto_free_aead(ctx->aead_send); + ctx->sk_destruct(sk); + clean_acked_data_disable(inet_csk(sk)); + + if (refcount_dec_and_test(&tls_ctx->refcount)) + tls_device_queue_ctx_destruction(tls_ctx); +} +EXPORT_SYMBOL(tls_device_sk_destruct); + +static void tls_append_frag(struct tls_record_info *record, + struct page_frag *pfrag, + int size) +{ + skb_frag_t *frag; + + frag = &record->frags[record->num_frags - 1]; + if (frag->page.p == pfrag->page && + frag->page_offset + frag->size == pfrag->offset) { + frag->size += size; + } else { + ++frag; + frag->page.p = pfrag->page; + frag->page_offset = pfrag->offset; + frag->size = size; + ++record->num_frags; + get_page(pfrag->page); + } + + pfrag->offset += size; + record->len += size; +} + +static int tls_push_record(struct sock *sk, + struct tls_context *ctx, + struct tls_offload_context *offload_ctx, + struct tls_record_info *record, + struct page_frag *pfrag, + int flags, + unsigned char record_type) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct page_frag dummy_tag_frag; + skb_frag_t *frag; + int i; + + /* fill prepend */ + frag = &record->frags[0]; + tls_fill_prepend(ctx, + skb_frag_address(frag), + record->len - ctx->tx.prepend_size, + record_type); + + /* HW doesn't care about the data in the tag, because it fills it. */ + dummy_tag_frag.page = skb_frag_page(frag); + dummy_tag_frag.offset = 0; + + tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size); + record->end_seq = tp->write_seq + record->len; + spin_lock_irq(&offload_ctx->lock); + list_add_tail(&record->list, &offload_ctx->records_list); + spin_unlock_irq(&offload_ctx->lock); + offload_ctx->open_record = NULL; + set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); + tls_advance_record_sn(sk, &ctx->tx); + + for (i = 0; i < record->num_frags; i++) { + frag = &record->frags[i]; + sg_unmark_end(&offload_ctx->sg_tx_data[i]); + sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), + frag->size, frag->page_offset); + sk_mem_charge(sk, frag->size); + get_page(skb_frag_page(frag)); + } + sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); + + /* all ready, send */ + return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags); +} + +static int tls_create_new_record(struct tls_offload_context *offload_ctx, + struct page_frag *pfrag, + size_t prepend_size) +{ + struct tls_record_info *record; + skb_frag_t *frag; + + record = kmalloc(sizeof(*record), GFP_KERNEL); + if (!record) + return -ENOMEM; + + frag = &record->frags[0]; + __skb_frag_set_page(frag, pfrag->page); + frag->page_offset = pfrag->offset; + skb_frag_size_set(frag, prepend_size); + + get_page(pfrag->page); + pfrag->offset += prepend_size; + + record->num_frags = 1; + record->len = prepend_size; + offload_ctx->open_record = record; + return 0; +} + +static int tls_do_allocation(struct sock *sk, + struct tls_offload_context *offload_ctx, + struct page_frag *pfrag, + size_t prepend_size) +{ + int ret; + + if (!offload_ctx->open_record) { + if (unlikely(!skb_page_frag_refill(prepend_size, pfrag, + sk->sk_allocation))) { + sk->sk_prot->enter_memory_pressure(sk); + sk_stream_moderate_sndbuf(sk); + return -ENOMEM; + } + + ret = tls_create_new_record(offload_ctx, pfrag, prepend_size); + if (ret) + return ret; + + if (pfrag->size > pfrag->offset) + return 0; + } + + if (!sk_page_frag_refill(sk, pfrag)) + return -ENOMEM; + + return 0; +} + +static int tls_push_data(struct sock *sk, + struct iov_iter *msg_iter, + size_t size, int flags, + unsigned char record_type) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx); + int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); + struct tls_record_info *record = ctx->open_record; + struct page_frag *pfrag; + size_t orig_size = size; + u32 max_open_record_len; + int copy, rc = 0; + bool done = false; + long timeo; + + if (flags & + ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) + return -ENOTSUPP; + + if (sk->sk_err) + return -sk->sk_err; + + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + rc = tls_complete_pending_work(sk, tls_ctx, flags, &timeo); + if (rc < 0) + return rc; + + pfrag = sk_page_frag(sk); + + /* TLS_HEADER_SIZE is not counted as part of the TLS record, and + * we need to leave room for an authentication tag. + */ + max_open_record_len = TLS_MAX_PAYLOAD_SIZE + + tls_ctx->tx.prepend_size; + do { + rc = tls_do_allocation(sk, ctx, pfrag, + tls_ctx->tx.prepend_size); + if (rc) { + rc = sk_stream_wait_memory(sk, &timeo); + if (!rc) + continue; + + record = ctx->open_record; + if (!record) + break; +handle_error: + if (record_type != TLS_RECORD_TYPE_DATA) { + /* avoid sending partial + * record with type != + * application_data + */ + size = orig_size; + destroy_record(record); + ctx->open_record = NULL; + } else if (record->len > tls_ctx->tx.prepend_size) { + goto last_record; + } + + break; + } + + record = ctx->open_record; + copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); + copy = min_t(size_t, copy, (max_open_record_len - record->len)); + + if (copy_from_iter_nocache(page_address(pfrag->page) + + pfrag->offset, + copy, msg_iter) != copy) { + rc = -EFAULT; + goto handle_error; + } + tls_append_frag(record, pfrag, copy); + + size -= copy; + if (!size) { +last_record: + tls_push_record_flags = flags; + if (more) { + tls_ctx->pending_open_record_frags = + record->num_frags; + break; + } + + done = true; + } + + if (done || record->len >= max_open_record_len || + (record->num_frags >= MAX_SKB_FRAGS - 1)) { + rc = tls_push_record(sk, + tls_ctx, + ctx, + record, + pfrag, + tls_push_record_flags, + record_type); + if (rc < 0) + break; + } + } while (!done); + + if (orig_size - size > 0) + rc = orig_size - size; + + return rc; +} + +int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + unsigned char record_type = TLS_RECORD_TYPE_DATA; + int rc; + + lock_sock(sk); + + if (unlikely(msg->msg_controllen)) { + rc = tls_proccess_cmsg(sk, msg, &record_type); + if (rc) + goto out; + } + + rc = tls_push_data(sk, &msg->msg_iter, size, + msg->msg_flags, record_type); + +out: + release_sock(sk); + return rc; +} + +int tls_device_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + struct iov_iter msg_iter; + char *kaddr = kmap(page); + struct kvec iov; + int rc; + + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + + lock_sock(sk); + + if (flags & MSG_OOB) { + rc = -ENOTSUPP; + goto out; + } + + iov.iov_base = kaddr + offset; + iov.iov_len = size; + iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + rc = tls_push_data(sk, &msg_iter, size, + flags, TLS_RECORD_TYPE_DATA); + kunmap(page); + +out: + release_sock(sk); + return rc; +} + +struct tls_record_info *tls_get_record(struct tls_offload_context *context, + u32 seq, u64 *p_record_sn) +{ + u64 record_sn = context->hint_record_sn; + struct tls_record_info *info; + + info = context->retransmit_hint; + if (!info || + before(seq, info->end_seq - info->len)) { + /* if retransmit_hint is irrelevant start + * from the beggining of the list + */ + info = list_first_entry(&context->records_list, + struct tls_record_info, list); + record_sn = context->unacked_record_sn; + } + + list_for_each_entry_from(info, &context->records_list, list) { + if (before(seq, info->end_seq)) { + if (!context->retransmit_hint || + after(info->end_seq, + context->retransmit_hint->end_seq)) { + context->hint_record_sn = record_sn; + context->retransmit_hint = info; + } + *p_record_sn = record_sn; + return info; + } + record_sn++; + } + + return NULL; +} +EXPORT_SYMBOL(tls_get_record); + +static int tls_device_push_pending_record(struct sock *sk, int flags) +{ + struct iov_iter msg_iter; + + iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0); + return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA); +} + +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +{ + u16 nonce_size, tag_size, iv_size, rec_seq_size; + struct tls_record_info *start_marker_record; + struct tls_offload_context *offload_ctx; + struct tls_crypto_info *crypto_info; + struct net_device *netdev; + char *iv, *rec_seq; + struct sk_buff *skb; + int rc = -EINVAL; + __be64 rcd_sn; + + if (!ctx) + goto out; + + if (ctx->priv_ctx_tx) { + rc = -EEXIST; + goto out; + } + + start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); + if (!start_marker_record) { + rc = -ENOMEM; + goto out; + } + + offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE, GFP_KERNEL); + if (!offload_ctx) { + rc = -ENOMEM; + goto free_marker_record; + } + + crypto_info = &ctx->crypto_send; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; + rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; + break; + default: + rc = -EINVAL; + goto free_offload_ctx; + } + + ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size; + ctx->tx.tag_size = tag_size; + ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size; + ctx->tx.iv_size = iv_size; + ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + GFP_KERNEL); + if (!ctx->tx.iv) { + rc = -ENOMEM; + goto free_offload_ctx; + } + + memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); + + ctx->tx.rec_seq_size = rec_seq_size; + ctx->tx.rec_seq = kmalloc(rec_seq_size, GFP_KERNEL); + if (!ctx->tx.rec_seq) { + rc = -ENOMEM; + goto free_iv; + } + memcpy(ctx->tx.rec_seq, rec_seq, rec_seq_size); + + rc = tls_sw_fallback_init(sk, offload_ctx, crypto_info); + if (rc) + goto free_rec_seq; + + /* start at rec_seq - 1 to account for the start marker record */ + memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); + offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; + + start_marker_record->end_seq = tcp_sk(sk)->write_seq; + start_marker_record->len = 0; + start_marker_record->num_frags = 0; + + INIT_LIST_HEAD(&offload_ctx->records_list); + list_add_tail(&start_marker_record->list, &offload_ctx->records_list); + spin_lock_init(&offload_ctx->lock); + + clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); + ctx->push_pending_record = tls_device_push_pending_record; + offload_ctx->sk_destruct = sk->sk_destruct; + + /* TLS offload is greatly simplified if we don't send + * SKBs where only part of the payload needs to be encrypted. + * So mark the last skb in the write queue as end of record. + */ + skb = tcp_write_queue_tail(sk); + if (skb) + TCP_SKB_CB(skb)->eor = 1; + + refcount_set(&ctx->refcount, 1); + + /* We support starting offload on multiple sockets + * concurrently, so we only need a read lock here. + * This lock must precede get_netdev_for_sock to prevent races between + * NETDEV_DOWN and setsockopt. + */ + down_read(&device_offload_lock); + netdev = get_netdev_for_sock(sk); + if (!netdev) { + pr_err_ratelimited("%s: netdev not found\n", __func__); + rc = -EINVAL; + goto release_lock; + } + + if (!(netdev->features & NETIF_F_HW_TLS_TX)) { + rc = -ENOTSUPP; + goto release_netdev; + } + + /* Avoid offloading if the device is down + * We don't want to offload new flows after + * the NETDEV_DOWN event + */ + if (!(netdev->flags & IFF_UP)) { + rc = -EINVAL; + goto release_netdev; + } + + ctx->priv_ctx_tx = offload_ctx; + rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX, + &ctx->crypto_send, + tcp_sk(sk)->write_seq); + if (rc) + goto release_netdev; + + ctx->netdev = netdev; + + spin_lock_irq(&tls_device_lock); + list_add_tail(&ctx->list, &tls_device_list); + spin_unlock_irq(&tls_device_lock); + + sk->sk_validate_xmit_skb = tls_validate_xmit_skb; + /* following this assignment tls_is_sk_tx_device_offloaded + * will return true and the context might be accessed + * by the netdev's xmit function. + */ + smp_store_release(&sk->sk_destruct, + &tls_device_sk_destruct); + up_read(&device_offload_lock); + goto out; + +release_netdev: + dev_put(netdev); +release_lock: + up_read(&device_offload_lock); + clean_acked_data_disable(inet_csk(sk)); + crypto_free_aead(offload_ctx->aead_send); +free_rec_seq: + kfree(ctx->tx.rec_seq); +free_iv: + kfree(ctx->tx.iv); +free_offload_ctx: + kfree(offload_ctx); + ctx->priv_ctx_tx = NULL; +free_marker_record: + kfree(start_marker_record); +out: + return rc; +} + +static int tls_device_down(struct net_device *netdev) +{ + struct tls_context *ctx, *tmp; + unsigned long flags; + LIST_HEAD(list); + + /* Request a write lock to block new offload attempts */ + down_write(&device_offload_lock); + + spin_lock_irqsave(&tls_device_lock, flags); + list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) { + if (ctx->netdev != netdev || + !refcount_inc_not_zero(&ctx->refcount)) + continue; + + list_move(&ctx->list, &list); + } + spin_unlock_irqrestore(&tls_device_lock, flags); + + list_for_each_entry_safe(ctx, tmp, &list, list) { + netdev->tlsdev_ops->tls_dev_del(netdev, ctx, + TLS_OFFLOAD_CTX_DIR_TX); + ctx->netdev = NULL; + dev_put(netdev); + list_del_init(&ctx->list); + + if (refcount_dec_and_test(&ctx->refcount)) + tls_device_free_ctx(ctx); + } + + up_write(&device_offload_lock); + + flush_work(&tls_device_gc_work); + + return NOTIFY_DONE; +} + +static int tls_dev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (!(dev->features & NETIF_F_HW_TLS_TX)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + case NETDEV_FEAT_CHANGE: + if (dev->tlsdev_ops && + dev->tlsdev_ops->tls_dev_add && + dev->tlsdev_ops->tls_dev_del) + return NOTIFY_DONE; + else + return NOTIFY_BAD; + case NETDEV_DOWN: + return tls_device_down(dev); + } + return NOTIFY_DONE; +} + +static struct notifier_block tls_dev_notifier = { + .notifier_call = tls_dev_event, +}; + +void __init tls_device_init(void) +{ + register_netdevice_notifier(&tls_dev_notifier); +} + +void __exit tls_device_cleanup(void) +{ + unregister_netdevice_notifier(&tls_dev_notifier); + flush_work(&tls_device_gc_work); +} diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c new file mode 100644 index 000000000000..748914abdb60 --- /dev/null +++ b/net/tls/tls_device_fallback.c @@ -0,0 +1,450 @@ +/* Copyright (c) 2018, Mellanox Technologies All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/tls.h> +#include <crypto/aead.h> +#include <crypto/scatterwalk.h> +#include <net/ip6_checksum.h> + +static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk) +{ + struct scatterlist *src = walk->sg; + int diff = walk->offset - src->offset; + + sg_set_page(sg, sg_page(src), + src->length - diff, walk->offset); + + scatterwalk_crypto_chain(sg, sg_next(src), 0, 2); +} + +static int tls_enc_record(struct aead_request *aead_req, + struct crypto_aead *aead, char *aad, + char *iv, __be64 rcd_sn, + struct scatter_walk *in, + struct scatter_walk *out, int *in_len) +{ + unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE]; + struct scatterlist sg_in[3]; + struct scatterlist sg_out[3]; + u16 len; + int rc; + + len = min_t(int, *in_len, ARRAY_SIZE(buf)); + + scatterwalk_copychunks(buf, in, len, 0); + scatterwalk_copychunks(buf, out, len, 1); + + *in_len -= len; + if (!*in_len) + return 0; + + scatterwalk_pagedone(in, 0, 1); + scatterwalk_pagedone(out, 1, 1); + + len = buf[4] | (buf[3] << 8); + len -= TLS_CIPHER_AES_GCM_128_IV_SIZE; + + tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE, + (char *)&rcd_sn, sizeof(rcd_sn), buf[0]); + + memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + + sg_init_table(sg_in, ARRAY_SIZE(sg_in)); + sg_init_table(sg_out, ARRAY_SIZE(sg_out)); + sg_set_buf(sg_in, aad, TLS_AAD_SPACE_SIZE); + sg_set_buf(sg_out, aad, TLS_AAD_SPACE_SIZE); + chain_to_walk(sg_in + 1, in); + chain_to_walk(sg_out + 1, out); + + *in_len -= len; + if (*in_len < 0) { + *in_len += TLS_CIPHER_AES_GCM_128_TAG_SIZE; + /* the input buffer doesn't contain the entire record. + * trim len accordingly. The resulting authentication tag + * will contain garbage, but we don't care, so we won't + * include any of it in the output skb + * Note that we assume the output buffer length + * is larger then input buffer length + tag size + */ + if (*in_len < 0) + len += *in_len; + + *in_len = 0; + } + + if (*in_len) { + scatterwalk_copychunks(NULL, in, len, 2); + scatterwalk_pagedone(in, 0, 1); + scatterwalk_copychunks(NULL, out, len, 2); + scatterwalk_pagedone(out, 1, 1); + } + + len -= TLS_CIPHER_AES_GCM_128_TAG_SIZE; + aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv); + + rc = crypto_aead_encrypt(aead_req); + + return rc; +} + +static void tls_init_aead_request(struct aead_request *aead_req, + struct crypto_aead *aead) +{ + aead_request_set_tfm(aead_req, aead); + aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); +} + +static struct aead_request *tls_alloc_aead_request(struct crypto_aead *aead, + gfp_t flags) +{ + unsigned int req_size = sizeof(struct aead_request) + + crypto_aead_reqsize(aead); + struct aead_request *aead_req; + + aead_req = kzalloc(req_size, flags); + if (aead_req) + tls_init_aead_request(aead_req, aead); + return aead_req; +} + +static int tls_enc_records(struct aead_request *aead_req, + struct crypto_aead *aead, struct scatterlist *sg_in, + struct scatterlist *sg_out, char *aad, char *iv, + u64 rcd_sn, int len) +{ + struct scatter_walk out, in; + int rc; + + scatterwalk_start(&in, sg_in); + scatterwalk_start(&out, sg_out); + + do { + rc = tls_enc_record(aead_req, aead, aad, iv, + cpu_to_be64(rcd_sn), &in, &out, &len); + rcd_sn++; + + } while (rc == 0 && len); + + scatterwalk_done(&in, 0, 0); + scatterwalk_done(&out, 1, 0); + + return rc; +} + +/* Can't use icsk->icsk_af_ops->send_check here because the ip addresses + * might have been changed by NAT. + */ +static void update_chksum(struct sk_buff *skb, int headln) +{ + struct tcphdr *th = tcp_hdr(skb); + int datalen = skb->len - headln; + const struct ipv6hdr *ipv6h; + const struct iphdr *iph; + + /* We only changed the payload so if we are using partial we don't + * need to update anything. + */ + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) + return; + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (skb->sk->sk_family == AF_INET6) { + ipv6h = ipv6_hdr(skb); + th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, IPPROTO_TCP, 0); + } else { + iph = ip_hdr(skb); + th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, + IPPROTO_TCP, 0); + } +} + +static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) +{ + skb_copy_header(nskb, skb); + + skb_put(nskb, skb->len); + memcpy(nskb->data, skb->data, headln); + update_chksum(nskb, headln); + + nskb->destructor = skb->destructor; + nskb->sk = skb->sk; + skb->destructor = NULL; + skb->sk = NULL; + refcount_add(nskb->truesize - skb->truesize, + &nskb->sk->sk_wmem_alloc); +} + +/* This function may be called after the user socket is already + * closed so make sure we don't use anything freed during + * tls_sk_proto_close here + */ + +static int fill_sg_in(struct scatterlist *sg_in, + struct sk_buff *skb, + struct tls_offload_context *ctx, + u64 *rcd_sn, + s32 *sync_size, + int *resync_sgs) +{ + int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + int payload_len = skb->len - tcp_payload_offset; + u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); + struct tls_record_info *record; + unsigned long flags; + int remaining; + int i; + + spin_lock_irqsave(&ctx->lock, flags); + record = tls_get_record(ctx, tcp_seq, rcd_sn); + if (!record) { + spin_unlock_irqrestore(&ctx->lock, flags); + WARN(1, "Record not found for seq %u\n", tcp_seq); + return -EINVAL; + } + + *sync_size = tcp_seq - tls_record_start_seq(record); + if (*sync_size < 0) { + int is_start_marker = tls_record_is_start_marker(record); + + spin_unlock_irqrestore(&ctx->lock, flags); + /* This should only occur if the relevant record was + * already acked. In that case it should be ok + * to drop the packet and avoid retransmission. + * + * There is a corner case where the packet contains + * both an acked and a non-acked record. + * We currently don't handle that case and rely + * on TCP to retranmit a packet that doesn't contain + * already acked payload. + */ + if (!is_start_marker) + *sync_size = 0; + return -EINVAL; + } + + remaining = *sync_size; + for (i = 0; remaining > 0; i++) { + skb_frag_t *frag = &record->frags[i]; + + __skb_frag_ref(frag); + sg_set_page(sg_in + i, skb_frag_page(frag), + skb_frag_size(frag), frag->page_offset); + + remaining -= skb_frag_size(frag); + + if (remaining < 0) + sg_in[i].length += remaining; + } + *resync_sgs = i; + + spin_unlock_irqrestore(&ctx->lock, flags); + if (skb_to_sgvec(skb, &sg_in[i], tcp_payload_offset, payload_len) < 0) + return -EINVAL; + + return 0; +} + +static void fill_sg_out(struct scatterlist sg_out[3], void *buf, + struct tls_context *tls_ctx, + struct sk_buff *nskb, + int tcp_payload_offset, + int payload_len, + int sync_size, + void *dummy_buf) +{ + sg_set_buf(&sg_out[0], dummy_buf, sync_size); + sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len); + /* Add room for authentication tag produced by crypto */ + dummy_buf += sync_size; + sg_set_buf(&sg_out[2], dummy_buf, TLS_CIPHER_AES_GCM_128_TAG_SIZE); +} + +static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, + struct scatterlist sg_out[3], + struct scatterlist *sg_in, + struct sk_buff *skb, + s32 sync_size, u64 rcd_sn) +{ + int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx); + int payload_len = skb->len - tcp_payload_offset; + void *buf, *iv, *aad, *dummy_buf; + struct aead_request *aead_req; + struct sk_buff *nskb = NULL; + int buf_len; + + aead_req = tls_alloc_aead_request(ctx->aead_send, GFP_ATOMIC); + if (!aead_req) + return NULL; + + buf_len = TLS_CIPHER_AES_GCM_128_SALT_SIZE + + TLS_CIPHER_AES_GCM_128_IV_SIZE + + TLS_AAD_SPACE_SIZE + + sync_size + + TLS_CIPHER_AES_GCM_128_TAG_SIZE; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (!buf) + goto free_req; + + iv = buf; + memcpy(iv, tls_ctx->crypto_send_aes_gcm_128.salt, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE + + TLS_CIPHER_AES_GCM_128_IV_SIZE; + dummy_buf = aad + TLS_AAD_SPACE_SIZE; + + nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC); + if (!nskb) + goto free_buf; + + skb_reserve(nskb, skb_headroom(skb)); + + fill_sg_out(sg_out, buf, tls_ctx, nskb, tcp_payload_offset, + payload_len, sync_size, dummy_buf); + + if (tls_enc_records(aead_req, ctx->aead_send, sg_in, sg_out, aad, iv, + rcd_sn, sync_size + payload_len) < 0) + goto free_nskb; + + complete_skb(nskb, skb, tcp_payload_offset); + + /* validate_xmit_skb_list assumes that if the skb wasn't segmented + * nskb->prev will point to the skb itself + */ + nskb->prev = nskb; + +free_buf: + kfree(buf); +free_req: + kfree(aead_req); + return nskb; +free_nskb: + kfree_skb(nskb); + nskb = NULL; + goto free_buf; +} + +static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb) +{ + int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx); + int payload_len = skb->len - tcp_payload_offset; + struct scatterlist *sg_in, sg_out[3]; + struct sk_buff *nskb = NULL; + int sg_in_max_elements; + int resync_sgs = 0; + s32 sync_size = 0; + u64 rcd_sn; + + /* worst case is: + * MAX_SKB_FRAGS in tls_record_info + * MAX_SKB_FRAGS + 1 in SKB head and frags. + */ + sg_in_max_elements = 2 * MAX_SKB_FRAGS + 1; + + if (!payload_len) + return skb; + + sg_in = kmalloc_array(sg_in_max_elements, sizeof(*sg_in), GFP_ATOMIC); + if (!sg_in) + goto free_orig; + + sg_init_table(sg_in, sg_in_max_elements); + sg_init_table(sg_out, ARRAY_SIZE(sg_out)); + + if (fill_sg_in(sg_in, skb, ctx, &rcd_sn, &sync_size, &resync_sgs)) { + /* bypass packets before kernel TLS socket option was set */ + if (sync_size < 0 && payload_len <= -sync_size) + nskb = skb_get(skb); + goto put_sg; + } + + nskb = tls_enc_skb(tls_ctx, sg_out, sg_in, skb, sync_size, rcd_sn); + +put_sg: + while (resync_sgs) + put_page(sg_page(&sg_in[--resync_sgs])); + kfree(sg_in); +free_orig: + kfree_skb(skb); + return nskb; +} + +struct sk_buff *tls_validate_xmit_skb(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb) +{ + if (dev == tls_get_ctx(sk)->netdev) + return skb; + + return tls_sw_fallback(sk, skb); +} + +int tls_sw_fallback_init(struct sock *sk, + struct tls_offload_context *offload_ctx, + struct tls_crypto_info *crypto_info) +{ + const u8 *key; + int rc; + + offload_ctx->aead_send = + crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(offload_ctx->aead_send)) { + rc = PTR_ERR(offload_ctx->aead_send); + pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc); + offload_ctx->aead_send = NULL; + goto err_out; + } + + key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key; + + rc = crypto_aead_setkey(offload_ctx->aead_send, key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + if (rc) + goto free_aead; + + rc = crypto_aead_setauthsize(offload_ctx->aead_send, + TLS_CIPHER_AES_GCM_128_TAG_SIZE); + if (rc) + goto free_aead; + + return 0; +free_aead: + crypto_free_aead(offload_ctx->aead_send); +err_out: + return rc; +} diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index cc03e00785c7..4b57ddd72f34 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -51,12 +51,12 @@ enum { TLSV6, TLS_NUM_PROTS, }; - enum { TLS_BASE, - TLS_SW_TX, - TLS_SW_RX, - TLS_SW_RXTX, + TLS_SW, +#ifdef CONFIG_TLS_DEVICE + TLS_HW, +#endif TLS_HW_RECORD, TLS_NUM_CONFIG, }; @@ -65,14 +65,14 @@ static struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); static LIST_HEAD(device_list); static DEFINE_MUTEX(device_mutex); -static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG]; +static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static struct proto_ops tls_sw_proto_ops; -static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) +static void update_sk_prot(struct sock *sk, struct tls_context *ctx) { int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; - sk->sk_prot = &tls_prots[ip_ver][ctx->conf]; + sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]; } int wait_on_pending_writer(struct sock *sk, long *timeo) @@ -252,10 +252,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) lock_sock(sk); sk_proto_close = ctx->sk_proto_close; - if (ctx->conf == TLS_HW_RECORD) + if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD) goto skip_tx_cleanup; - if (ctx->conf == TLS_BASE) { + if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) { kfree(ctx); ctx = NULL; goto skip_tx_cleanup; @@ -277,15 +277,26 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) } } - kfree(ctx->tx.rec_seq); - kfree(ctx->tx.iv); - kfree(ctx->rx.rec_seq); - kfree(ctx->rx.iv); + /* We need these for tls_sw_fallback handling of other packets */ + if (ctx->tx_conf == TLS_SW) { + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + tls_sw_free_resources_tx(sk); + } - if (ctx->conf == TLS_SW_TX || - ctx->conf == TLS_SW_RX || - ctx->conf == TLS_SW_RXTX) { - tls_sw_free_resources(sk); + if (ctx->rx_conf == TLS_SW) { + kfree(ctx->rx.rec_seq); + kfree(ctx->rx.iv); + tls_sw_free_resources_rx(sk); + } + +#ifdef CONFIG_TLS_DEVICE + if (ctx->tx_conf != TLS_HW) { +#else + { +#endif + kfree(ctx); + ctx = NULL; } skip_tx_cleanup: @@ -294,7 +305,8 @@ skip_tx_cleanup: /* free ctx for TLS_HW_RECORD, used by tcp_set_state * for sk->sk_prot->unhash [tls_hw_unhash] */ - if (ctx && ctx->conf == TLS_HW_RECORD) + if (ctx && ctx->tx_conf == TLS_HW_RECORD && + ctx->rx_conf == TLS_HW_RECORD) kfree(ctx); } @@ -448,25 +460,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, goto err_crypto_info; } - /* currently SW is default, we will have ethtool in future */ if (tx) { - rc = tls_set_sw_offload(sk, ctx, 1); - if (ctx->conf == TLS_SW_RX) - conf = TLS_SW_RXTX; - else - conf = TLS_SW_TX; +#ifdef CONFIG_TLS_DEVICE + rc = tls_set_device_offload(sk, ctx); + conf = TLS_HW; + if (rc) { +#else + { +#endif + rc = tls_set_sw_offload(sk, ctx, 1); + conf = TLS_SW; + } } else { rc = tls_set_sw_offload(sk, ctx, 0); - if (ctx->conf == TLS_SW_TX) - conf = TLS_SW_RXTX; - else - conf = TLS_SW_RX; + conf = TLS_SW; } if (rc) goto err_crypto_info; - ctx->conf = conf; + if (tx) + ctx->tx_conf = conf; + else + ctx->rx_conf = conf; update_sk_prot(sk, ctx); if (tx) { ctx->sk_write_space = sk->sk_write_space; @@ -542,7 +558,8 @@ static int tls_hw_prot(struct sock *sk) ctx->hash = sk->sk_prot->hash; ctx->unhash = sk->sk_prot->unhash; ctx->sk_proto_close = sk->sk_prot->close; - ctx->conf = TLS_HW_RECORD; + ctx->rx_conf = TLS_HW_RECORD; + ctx->tx_conf = TLS_HW_RECORD; update_sk_prot(sk, ctx); rc = 1; break; @@ -586,29 +603,40 @@ static int tls_hw_hash(struct sock *sk) return err; } -static void build_protos(struct proto *prot, struct proto *base) +static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], + struct proto *base) { - prot[TLS_BASE] = *base; - prot[TLS_BASE].setsockopt = tls_setsockopt; - prot[TLS_BASE].getsockopt = tls_getsockopt; - prot[TLS_BASE].close = tls_sk_proto_close; - - prot[TLS_SW_TX] = prot[TLS_BASE]; - prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; - prot[TLS_SW_TX].sendpage = tls_sw_sendpage; - - prot[TLS_SW_RX] = prot[TLS_BASE]; - prot[TLS_SW_RX].recvmsg = tls_sw_recvmsg; - prot[TLS_SW_RX].close = tls_sk_proto_close; - - prot[TLS_SW_RXTX] = prot[TLS_SW_TX]; - prot[TLS_SW_RXTX].recvmsg = tls_sw_recvmsg; - prot[TLS_SW_RXTX].close = tls_sk_proto_close; - - prot[TLS_HW_RECORD] = *base; - prot[TLS_HW_RECORD].hash = tls_hw_hash; - prot[TLS_HW_RECORD].unhash = tls_hw_unhash; - prot[TLS_HW_RECORD].close = tls_sk_proto_close; + prot[TLS_BASE][TLS_BASE] = *base; + prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; + prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; + prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; + + prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; + prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; + + prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; + prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close; + + prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE]; + prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg; + prot[TLS_SW][TLS_SW].close = tls_sk_proto_close; + +#ifdef CONFIG_TLS_DEVICE + prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; + prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; + + prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; + prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; + prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; +#endif + + prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base; + prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].close = tls_sk_proto_close; } static int tls_init(struct sock *sk) @@ -639,7 +667,7 @@ static int tls_init(struct sock *sk) ctx->getsockopt = sk->sk_prot->getsockopt; ctx->sk_proto_close = sk->sk_prot->close; - /* Build IPv6 TLS whenever the address of tcpv6_prot changes */ + /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */ if (ip_ver == TLSV6 && unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { mutex_lock(&tcpv6_prot_mutex); @@ -650,7 +678,8 @@ static int tls_init(struct sock *sk) mutex_unlock(&tcpv6_prot_mutex); } - ctx->conf = TLS_BASE; + ctx->tx_conf = TLS_BASE; + ctx->rx_conf = TLS_BASE; update_sk_prot(sk, ctx); out: return rc; @@ -688,6 +717,9 @@ static int __init tls_register(void) tls_sw_proto_ops.poll = tls_sw_poll; tls_sw_proto_ops.splice_read = tls_sw_splice_read; +#ifdef CONFIG_TLS_DEVICE + tls_device_init(); +#endif tcp_register_ulp(&tcp_tls_ulp_ops); return 0; @@ -696,6 +728,9 @@ static int __init tls_register(void) static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); +#ifdef CONFIG_TLS_DEVICE + tls_device_cleanup(); +#endif } module_init(tls_register); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 71e79597f940..5c3909c311f1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -52,7 +52,7 @@ static int tls_do_decryption(struct sock *sk, gfp_t flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm = strp_msg(skb); struct aead_request *aead_req; @@ -122,7 +122,7 @@ out: static void trim_both_sgl(struct sock *sk, int target_size) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); trim_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem, @@ -141,7 +141,7 @@ static void trim_both_sgl(struct sock *sk, int target_size) static int alloc_encrypted_sg(struct sock *sk, int len) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); int rc = 0; rc = sk_alloc_sg(sk, len, @@ -155,7 +155,7 @@ static int alloc_encrypted_sg(struct sock *sk, int len) static int alloc_plaintext_sg(struct sock *sk, int len) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); int rc = 0; rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0, @@ -181,7 +181,7 @@ static void free_sg(struct sock *sk, struct scatterlist *sg, static void tls_free_both_sg(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size); @@ -191,7 +191,7 @@ static void tls_free_both_sg(struct sock *sk) } static int tls_do_encryption(struct tls_context *tls_ctx, - struct tls_sw_context *ctx, size_t data_len, + struct tls_sw_context_tx *ctx, size_t data_len, gfp_t flags) { unsigned int req_size = sizeof(struct aead_request) + @@ -227,7 +227,7 @@ static int tls_push_record(struct sock *sk, int flags, unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); int rc; sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); @@ -339,7 +339,7 @@ static int memcopy_from_iter(struct sock *sk, struct iov_iter *from, int bytes) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct scatterlist *sg = ctx->sg_plaintext_data; int copy, i, rc = 0; @@ -367,7 +367,7 @@ out: int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); int ret = 0; int required_size; long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -522,7 +522,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); int ret = 0; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); bool eor; @@ -636,7 +636,7 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags, long timeo, int *err) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct sk_buff *skb; DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -674,7 +674,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE]; struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2]; struct scatterlist *sgin = &sgin_arr[0]; @@ -693,8 +693,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, if (!sgout) { nsg = skb_cow_data(skb, 0, &unused) + 1; sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation); - if (!sgout) - sgout = sgin; + sgout = sgin; } sg_init_table(sgin, nsg); @@ -724,7 +723,7 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, unsigned int len) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm = strp_msg(skb); if (len < rxm->full_len) { @@ -750,7 +749,7 @@ int tls_sw_recvmsg(struct sock *sk, int *addr_len) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); unsigned char control; struct strp_msg *rxm; struct sk_buff *skb; @@ -870,7 +869,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, size_t len, unsigned int flags) { struct tls_context *tls_ctx = tls_get_ctx(sock->sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm = NULL; struct sock *sk = sock->sk; struct sk_buff *skb; @@ -923,7 +922,7 @@ unsigned int tls_sw_poll(struct file *file, struct socket *sock, unsigned int ret; struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); /* Grab POLLOUT and POLLHUP from the underlying socket */ ret = ctx->sk_poll(file, sock, wait); @@ -939,7 +938,7 @@ unsigned int tls_sw_poll(struct file *file, struct socket *sock, static int tls_read_size(struct strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); char header[tls_ctx->rx.prepend_size]; struct strp_msg *rxm = strp_msg(skb); size_t cipher_overhead; @@ -988,7 +987,7 @@ read_failure: static void tls_queue(struct strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm; rxm = strp_msg(skb); @@ -1004,18 +1003,28 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb) static void tls_data_ready(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); strp_data_ready(&ctx->strp); } -void tls_sw_free_resources(struct sock *sk) +void tls_sw_free_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); if (ctx->aead_send) crypto_free_aead(ctx->aead_send); + tls_free_both_sg(sk); + + kfree(ctx); +} + +void tls_sw_free_resources_rx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + if (ctx->aead_recv) { if (ctx->recv_pkt) { kfree_skb(ctx->recv_pkt); @@ -1031,10 +1040,7 @@ void tls_sw_free_resources(struct sock *sk) lock_sock(sk); } - tls_free_both_sg(sk); - kfree(ctx); - kfree(tls_ctx); } int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) @@ -1042,7 +1048,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; struct tls_crypto_info *crypto_info; struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; - struct tls_sw_context *sw_ctx; + struct tls_sw_context_tx *sw_ctx_tx = NULL; + struct tls_sw_context_rx *sw_ctx_rx = NULL; struct cipher_context *cctx; struct crypto_aead **aead; struct strp_callbacks cb; @@ -1055,27 +1062,32 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto out; } - if (!ctx->priv_ctx) { - sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL); - if (!sw_ctx) { + if (tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) { rc = -ENOMEM; goto out; } - crypto_init_wait(&sw_ctx->async_wait); + crypto_init_wait(&sw_ctx_tx->async_wait); + ctx->priv_ctx_tx = sw_ctx_tx; } else { - sw_ctx = ctx->priv_ctx; + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) { + rc = -ENOMEM; + goto out; + } + crypto_init_wait(&sw_ctx_rx->async_wait); + ctx->priv_ctx_rx = sw_ctx_rx; } - ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; - if (tx) { crypto_info = &ctx->crypto_send; cctx = &ctx->tx; - aead = &sw_ctx->aead_send; + aead = &sw_ctx_tx->aead_send; } else { crypto_info = &ctx->crypto_recv; cctx = &ctx->rx; - aead = &sw_ctx->aead_recv; + aead = &sw_ctx_rx->aead_recv; } switch (crypto_info->cipher_type) { @@ -1122,22 +1134,24 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } memcpy(cctx->rec_seq, rec_seq, rec_seq_size); - if (tx) { - sg_init_table(sw_ctx->sg_encrypted_data, - ARRAY_SIZE(sw_ctx->sg_encrypted_data)); - sg_init_table(sw_ctx->sg_plaintext_data, - ARRAY_SIZE(sw_ctx->sg_plaintext_data)); - - sg_init_table(sw_ctx->sg_aead_in, 2); - sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space, - sizeof(sw_ctx->aad_space)); - sg_unmark_end(&sw_ctx->sg_aead_in[1]); - sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data); - sg_init_table(sw_ctx->sg_aead_out, 2); - sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space, - sizeof(sw_ctx->aad_space)); - sg_unmark_end(&sw_ctx->sg_aead_out[1]); - sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data); + if (sw_ctx_tx) { + sg_init_table(sw_ctx_tx->sg_encrypted_data, + ARRAY_SIZE(sw_ctx_tx->sg_encrypted_data)); + sg_init_table(sw_ctx_tx->sg_plaintext_data, + ARRAY_SIZE(sw_ctx_tx->sg_plaintext_data)); + + sg_init_table(sw_ctx_tx->sg_aead_in, 2); + sg_set_buf(&sw_ctx_tx->sg_aead_in[0], sw_ctx_tx->aad_space, + sizeof(sw_ctx_tx->aad_space)); + sg_unmark_end(&sw_ctx_tx->sg_aead_in[1]); + sg_chain(sw_ctx_tx->sg_aead_in, 2, + sw_ctx_tx->sg_plaintext_data); + sg_init_table(sw_ctx_tx->sg_aead_out, 2); + sg_set_buf(&sw_ctx_tx->sg_aead_out[0], sw_ctx_tx->aad_space, + sizeof(sw_ctx_tx->aad_space)); + sg_unmark_end(&sw_ctx_tx->sg_aead_out[1]); + sg_chain(sw_ctx_tx->sg_aead_out, 2, + sw_ctx_tx->sg_encrypted_data); } if (!*aead) { @@ -1162,22 +1176,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (rc) goto free_aead; - if (!tx) { + if (sw_ctx_rx) { /* Set up strparser */ memset(&cb, 0, sizeof(cb)); cb.rcv_msg = tls_queue; cb.parse_msg = tls_read_size; - strp_init(&sw_ctx->strp, sk, &cb); + strp_init(&sw_ctx_rx->strp, sk, &cb); write_lock_bh(&sk->sk_callback_lock); - sw_ctx->saved_data_ready = sk->sk_data_ready; + sw_ctx_rx->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); - sw_ctx->sk_poll = sk->sk_socket->ops->poll; + sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll; - strp_check_rcv(&sw_ctx->strp); + strp_check_rcv(&sw_ctx_rx->strp); } goto out; @@ -1189,11 +1203,16 @@ free_rec_seq: kfree(cctx->rec_seq); cctx->rec_seq = NULL; free_iv: - kfree(ctx->tx.iv); - ctx->tx.iv = NULL; + kfree(cctx->iv); + cctx->iv = NULL; free_priv: - kfree(ctx->priv_ctx); - ctx->priv_ctx = NULL; + if (tx) { + kfree(ctx->priv_ctx_tx); + ctx->priv_ctx_tx = NULL; + } else { + kfree(ctx->priv_ctx_rx); + ctx->priv_ctx_rx = NULL; + } out: return rc; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4d6a6edd4bf6..b853581592fd 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor hostprogs-y += xdp_rxq_info hostprogs-y += syscall_tp hostprogs-y += cpustat +hostprogs-y += xdp_adjust_tail # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o @@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o +xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -112,7 +114,6 @@ always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o -always += tcbpf2_kern.o always += tc_l2_redirect_kern.o always += lathist_kern.o always += offwaketime_kern.o @@ -148,6 +149,7 @@ always += xdp_rxq_info_kern.o always += xdp2skb_meta_kern.o always += syscall_tp_kern.o always += cpustat_kern.o +always += xdp_adjust_tail_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -193,6 +195,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_xdp_rxq_info += -lelf HOSTLOADLIBES_syscall_tp += -lelf HOSTLOADLIBES_cpustat += -lelf +HOSTLOADLIBES_xdp_adjust_tail += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index bebe4188b4b3..feca497d6afd 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -549,7 +549,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) if (nr_maps < 0) { printf("Error: Failed loading ELF maps (errno:%d):%s\n", nr_maps, strerror(-nr_maps)); - ret = 1; goto done; } if (load_maps(map_data, nr_maps, fixup_map)) @@ -615,7 +614,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) } } - ret = 0; done: close(fd); return ret; diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 6fc6e193ef1b..33a637507c00 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -9,10 +9,10 @@ * if (value) * (*(u64*)value) += 1; * - * - attaches this program to eth0 raw socket + * - attaches this program to loopback interface "lo" raw socket * * - every second user space reads map[tcp], map[udp], map[icmp] to see - * how many packets of given protocol were seen on eth0 + * how many packets of given protocol were seen on "lo" */ #include <stdio.h> #include <unistd.h> diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh deleted file mode 100755 index c265863ccdf9..000000000000 --- a/samples/bpf/test_tunnel_bpf.sh +++ /dev/null @@ -1,319 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# In Namespace 0 (at_ns0) using native tunnel -# Overlay IP: 10.1.1.100 -# local 192.16.1.100 remote 192.16.1.200 -# veth0 IP: 172.16.1.100, tunnel dev <type>00 - -# Out of Namespace using BPF set/get on lwtunnel -# Overlay IP: 10.1.1.200 -# local 172.16.1.200 remote 172.16.1.100 -# veth1 IP: 172.16.1.200, tunnel dev <type>11 - -function config_device { - ip netns add at_ns0 - ip link add veth0 type veth peer name veth1 - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip link set dev veth1 up mtu 1500 - ip addr add dev veth1 172.16.1.200/24 -} - -function add_gre_tunnel { - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE key 2 external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ip6gretap_tunnel { - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \ - local ::11 remote ::22 - - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # out of namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip addr add dev $DEV fc80::200/24 - ip link set dev $DEV up -} - -function add_erspan_tunnel { - # in namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 2 erspan_dir egress erspan_hwid 3 - fi - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ip6erspan_tunnel { - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # in namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 2 erspan_dir egress erspan_hwid 7 - fi - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # out of namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - -function add_vxlan_tunnel { - # Set static ARP entry here because iptables set-mark works - # on L3 packet, as a result not applying to ARP packets, - # causing errors at get_tunnel_{key/opt}. - - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 - ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF - - # out of namespace - ip link add dev $DEV type $TYPE external gbp dstport 4789 - ip link set dev $DEV address 52:54:00:d9:02:00 up - ip addr add dev $DEV 10.1.1.200/24 - arp -s 10.1.1.100 52:54:00:d9:01:00 -} - -function add_geneve_tunnel { - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE dstport 6081 external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ipip_tunnel { - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function attach_bpf { - DEV=$1 - SET_TUNNEL=$2 - GET_TUNNEL=$3 - tc qdisc add dev $DEV clsact - tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL - tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL -} - -function test_gre { - TYPE=gretap - DEV_NS=gretap00 - DEV=gretap11 - config_device - add_gre_tunnel - attach_bpf $DEV gre_set_tunnel gre_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_ip6gre { - TYPE=ip6gre - DEV_NS=ip6gre00 - DEV=ip6gre11 - config_device - # reuse the ip6gretap function - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 -c 4 ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - ping -c 1 10.1.1.100 - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 -c 1 fc80::200 - cleanup -} - -function test_ip6gretap { - TYPE=ip6gretap - DEV_NS=ip6gretap00 - DEV=ip6gretap11 - config_device - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 -c 4 ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200 - ping -c 1 10.1.1.100 - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 -c 1 fc80::200 - cleanup -} - -function test_erspan { - TYPE=erspan - DEV_NS=erspan00 - DEV=erspan11 - config_device - add_erspan_tunnel $1 - attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_ip6erspan { - TYPE=ip6erspan - DEV_NS=ip6erspan00 - DEV=ip6erspan11 - config_device - add_ip6erspan_tunnel $1 - attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel - ping6 -c 3 ::11 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_vxlan { - TYPE=vxlan - DEV_NS=vxlan00 - DEV=vxlan11 - config_device - add_vxlan_tunnel - attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_geneve { - TYPE=geneve - DEV_NS=geneve00 - DEV=geneve11 - config_device - add_geneve_tunnel - attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_ipip { - TYPE=ipip - DEV_NS=ipip00 - DEV=ipip11 - config_device - tcpdump -nei veth1 & - cat /sys/kernel/debug/tracing/trace_pipe & - add_ipip_tunnel - ethtool -K veth1 gso off gro off rx off tx off - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null - sleep 0.2 - iperf -c 10.1.1.100 -n 5k -p 5200 - cleanup -} - -function cleanup { - set +ex - pkill iperf - ip netns delete at_ns0 - ip link del veth1 - ip link del ipip11 - ip link del gretap11 - ip link del ip6gre11 - ip link del ip6gretap11 - ip link del vxlan11 - ip link del geneve11 - ip link del erspan11 - ip link del ip6erspan11 - pkill tcpdump - pkill cat - set -ex -} - -trap cleanup 0 2 3 6 9 -cleanup -echo "Testing GRE tunnel..." -test_gre -echo "Testing IP6GRE tunnel..." -test_ip6gre -echo "Testing IP6GRETAP tunnel..." -test_ip6gretap -echo "Testing ERSPAN tunnel..." -test_erspan v1 -test_erspan v2 -echo "Testing IP6ERSPAN tunnel..." -test_ip6erspan v1 -test_ip6erspan v2 -echo "Testing VXLAN tunnel..." -test_vxlan -echo "Testing GENEVE tunnel..." -test_geneve -echo "Testing IPIP tunnel..." -test_ipip -echo "*** PASS ***" diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c new file mode 100644 index 000000000000..411fdb21f8bc --- /dev/null +++ b/samples/bpf/xdp_adjust_tail_kern.c @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program shows how to use bpf_xdp_adjust_tail() by + * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed + * to be more preice in case of v4)" where receiving packets bigger then + * 600 bytes. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/icmp.h> +#include "bpf_helpers.h" + +#define DEFAULT_TTL 64 +#define MAX_PCKT_SIZE 600 +#define ICMP_TOOBIG_SIZE 98 +#define ICMP_TOOBIG_PAYLOAD_SIZE 92 + +struct bpf_map_def SEC("maps") icmpcnt = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +static __always_inline void count_icmp(void) +{ + u64 key = 0; + u64 *icmp_count; + + icmp_count = bpf_map_lookup_elem(&icmpcnt, &key); + if (icmp_count) + *icmp_count += 1; +} + +static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth) +{ + struct ethhdr *eth; + + eth = data; + memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN); + memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN); + eth->h_proto = orig_eth->h_proto; +} + +static __always_inline __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline void ipv4_csum(void *data_start, int data_size, + __u32 *csum) +{ + *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum); + *csum = csum_fold_helper(*csum); +} + +static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) +{ + int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr); + + if (bpf_xdp_adjust_head(xdp, 0 - headroom)) + return XDP_DROP; + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + + if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end) + return XDP_DROP; + + struct iphdr *iph, *orig_iph; + struct icmphdr *icmp_hdr; + struct ethhdr *orig_eth; + __u32 csum = 0; + __u64 off = 0; + + orig_eth = data + headroom; + swap_mac(data, orig_eth); + off += sizeof(struct ethhdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + off += sizeof(struct icmphdr); + orig_iph = data + off; + icmp_hdr->type = ICMP_DEST_UNREACH; + icmp_hdr->code = ICMP_FRAG_NEEDED; + icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr)); + icmp_hdr->checksum = 0; + ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum); + icmp_hdr->checksum = csum; + iph->ttl = DEFAULT_TTL; + iph->daddr = orig_iph->saddr; + iph->saddr = orig_iph->daddr; + iph->version = 4; + iph->ihl = 5; + iph->protocol = IPPROTO_ICMP; + iph->tos = 0; + iph->tot_len = htons( + ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr)); + iph->check = 0; + csum = 0; + ipv4_csum(iph, sizeof(struct iphdr), &csum); + iph->check = csum; + count_icmp(); + return XDP_TX; +} + + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + int pckt_size = data_end - data; + int offset; + + if (pckt_size > MAX_PCKT_SIZE) { + offset = pckt_size - ICMP_TOOBIG_SIZE; + if (bpf_xdp_adjust_tail(xdp, 0 - offset)) + return XDP_PASS; + return send_icmp4_too_big(xdp); + } + return XDP_PASS; +} + +SEC("xdp_icmp") +int _xdp_icmp(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_IP)) + return handle_ipv4(xdp); + else + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c new file mode 100644 index 000000000000..f621a541b574 --- /dev/null +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <arpa/inet.h> +#include <netinet/ether.h> +#include <unistd.h> +#include <time.h> +#include "bpf_load.h" +#include "libbpf.h" +#include "bpf_util.h" + +#define STATS_INTERVAL_S 2U + +static int ifindex = -1; +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + if (ifindex > -1) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + exit(0); +} + +/* simple "icmp packet too big sent" counter + */ +static void poll_stats(unsigned int kill_after_s) +{ + time_t started_at = time(NULL); + __u64 value = 0; + int key = 0; + + + while (!kill_after_s || time(NULL) - started_at <= kill_after_s) { + sleep(STATS_INTERVAL_S); + + assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0); + + printf("icmp \"packet too big\" sent: %10llu pkts\n", value); + } +} + +static void usage(const char *cmd) +{ + printf("Start a XDP prog which send ICMP \"packet too big\" \n" + "messages if ingress packet is bigger then MAX_SIZE bytes\n"); + printf("Usage: %s [...]\n", cmd); + printf(" -i <ifindex> Interface Index\n"); + printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); + printf(" -S use skb-mode\n"); + printf(" -N enforce native mode\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + unsigned char opt_flags[256] = {}; + unsigned int kill_after_s = 0; + const char *optstr = "i:T:SNh"; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + int opt; + int i; + + + for (i = 0; i < strlen(optstr); i++) + if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') + opt_flags[(unsigned char)optstr[i]] = 1; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + + switch (opt) { + case 'i': + ifindex = atoi(optarg); + break; + case 'T': + kill_after_s = atoi(optarg); + break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(argv[0]); + return 1; + } + opt_flags[opt] = 0; + } + + for (i = 0; i < strlen(optstr); i++) { + if (opt_flags[(unsigned int)optstr[i]]) { + fprintf(stderr, "Missing argument -%c\n", optstr[i]); + usage(argv[0]); + return 1; + } + } + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); + return 1; + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + poll_stats(kill_after_s); + + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + + return 0; +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index eec14520d513..894bc64c2cac 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -330,7 +330,7 @@ static void stats_print(struct stats_record *stats_rec, pps = calc_pps_u64(r, p, t); if (pps > 0) printf(fmt1, "Exception", i, - 0.0, pps, err2str(rec_i)); + 0.0, pps, action2str(rec_i)); } pps = calc_pps_u64(&rec->total, &prev->total, t); if (pps > 0) diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile deleted file mode 100644 index fa53f4d77834..000000000000 --- a/samples/sockmap/Makefile +++ /dev/null @@ -1,78 +0,0 @@ -# List of programs to build -hostprogs-y := sockmap - -# Libbpf dependencies -LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o - -HOSTCFLAGS += -I$(objtree)/usr/include -HOSTCFLAGS += -I$(srctree)/tools/lib/ -HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ -HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include -HOSTCFLAGS += -I$(srctree)/tools/perf - -sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o - -# Tell kbuild to always build the programs -always := $(hostprogs-y) -always += sockmap_kern.o - -HOSTLOADLIBES_sockmap += -lelf -lpthread - -# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: -# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang -LLC ?= llc -CLANG ?= clang - -# Trick to allow make to be run from this directory -all: - $(MAKE) -C ../../ $(CURDIR)/ - -clean: - $(MAKE) -C ../../ M=$(CURDIR) clean - @rm -f *~ - -$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c - $(call if_changed_dep,cc_s_c) - -$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE - $(call filechk,offsets,__SYSCALL_NRS_H__) - -clean-files += syscall_nrs.h - -FORCE: - - -# Verify LLVM compiler tools are available and bpf target is supported by llc -.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) - -verify_cmds: $(CLANG) $(LLC) - @for TOOL in $^ ; do \ - if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \ - echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\ - exit 1; \ - else true; fi; \ - done - -verify_target_bpf: verify_cmds - @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \ - echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\ - echo " NOTICE: LLVM version >= 3.7.1 required" ;\ - exit 2; \ - else true; fi - -$(src)/*.c: verify_target_bpf - -# asm/sysreg.h - inline assembly used by it is incompatible with llvm. -# But, there is no easy way to fix it, so just exclude it since it is -# useless for BPF samples. -# -# -target bpf option required with SK_MSG programs, this is to ensure -# reading 'void *' data types for data and data_end are __u64 reads. -$(obj)/%.o: $(src)/%.c - $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ - -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ - -Wno-compare-distinct-pointer-types \ - -Wno-gnu-variable-sized-type-not-at-end \ - -Wno-address-of-packed-member -Wno-tautological-compare \ - -Wno-unknown-warning-option -O2 -target bpf \ - -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh deleted file mode 100755 index ace75f070eb8..000000000000 --- a/samples/sockmap/sockmap_test.sh +++ /dev/null @@ -1,488 +0,0 @@ -#Test a bunch of positive cases to verify basic functionality -for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do -for t in "sendmsg" "sendpage"; do -for r in 1 10 100; do - for i in 1 10 100; do - for l in 1 10 100; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 - done - done -done -done -done - -#Test max iov -t="sendmsg" -r=1 -i=1024 -l=1 -prog="--txmsg" - -TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" -echo $TEST -$TEST -sleep 2 -prog="--txmsg_redir" -TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" -echo $TEST -$TEST - -# Test max iov with 1k send - -t="sendmsg" -r=1 -i=1024 -l=1024 -prog="--txmsg" - -TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" -echo $TEST -$TEST -sleep 2 -prog="--txmsg_redir" -TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" -echo $TEST -$TEST -sleep 2 - -# Test apply with 1B -r=1 -i=1024 -l=1024 -prog="--txmsg_apply 1" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test apply with larger value than send -r=1 -i=8 -l=1024 -prog="--txmsg_apply 2048" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test apply with apply that never reaches limit -r=1024 -i=1 -l=1 -prog="--txmsg_apply 2048" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test apply and redirect with 1B -r=1 -i=1024 -l=1024 -prog="--txmsg_redir --txmsg_apply 1" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - - -# Test apply and redirect with larger value than send -r=1 -i=8 -l=1024 -prog="--txmsg_redir --txmsg_apply 2048" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - - -# Test apply and redirect with apply that never reaches limit -r=1024 -i=1 -l=1 -prog="--txmsg_apply 2048" - -for t in "sendmsg" "sendpage"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test cork with 1B not really useful but test it anyways -r=1 -i=1024 -l=1024 -prog="--txmsg_cork 1" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test cork with a more reasonable 100B -r=1 -i=1000 -l=1000 -prog="--txmsg_cork 100" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test cork with larger value than send -r=1 -i=8 -l=1024 -prog="--txmsg_cork 2048" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test cork with cork that never reaches limit -r=1024 -i=1 -l=1 -prog="--txmsg_cork 2048" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -r=1 -i=1024 -l=1024 -prog="--txmsg_redir --txmsg_cork 1" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test cork with a more reasonable 100B -r=1 -i=1000 -l=1000 -prog="--txmsg_redir --txmsg_cork 100" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test cork with larger value than send -r=1 -i=8 -l=1024 -prog="--txmsg_redir --txmsg_cork 2048" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test cork with cork that never reaches limit -r=1024 -i=1 -l=1 -prog="--txmsg_cork 2048" - -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - - -# mix and match cork and apply not really useful but valid programs - -# Test apply < cork -r=100 -i=1 -l=5 -prog="--txmsg_apply 10 --txmsg_cork 100" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Try again with larger sizes so we hit overflow case -r=100 -i=1000 -l=2048 -prog="--txmsg_apply 4096 --txmsg_cork 8096" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test apply > cork -r=100 -i=1 -l=5 -prog="--txmsg_apply 100 --txmsg_cork 10" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Again with larger sizes so we hit overflow cases -r=100 -i=1000 -l=2048 -prog="--txmsg_apply 8096 --txmsg_cork 4096" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - - -# Test apply = cork -r=100 -i=1 -l=5 -prog="--txmsg_apply 10 --txmsg_cork 10" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -r=100 -i=1000 -l=2048 -prog="--txmsg_apply 4096 --txmsg_cork 4096" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test apply < cork -r=100 -i=1 -l=5 -prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Try again with larger sizes so we hit overflow case -r=100 -i=1000 -l=2048 -prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Test apply > cork -r=100 -i=1 -l=5 -prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Again with larger sizes so we hit overflow cases -r=100 -i=1000 -l=2048 -prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - - -# Test apply = cork -r=100 -i=1 -l=5 -prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -r=100 -i=1000 -l=2048 -prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096" -for t in "sendpage" "sendmsg"; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" - echo $TEST - $TEST - sleep 2 -done - -# Tests for bpf_msg_pull_data() -for i in `seq 99 100 1600`; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ - --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600" - echo $TEST - $TEST - sleep 2 -done - -for i in `seq 199 100 1600`; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ - --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600" - echo $TEST - $TEST - sleep 2 -done - -TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ - --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600" -echo $TEST -$TEST -sleep 2 - -TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ - --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600" -echo $TEST -$TEST -sleep 2 - -TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ - --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600" -echo $TEST -$TEST -sleep 2 - -TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ - --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600" -echo $TEST -$TEST -sleep 2 - -TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ - --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602" -echo $TEST -$TEST -sleep 2 - -# Run through gamut again with start and end -for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do -for t in "sendmsg" "sendpage"; do -for r in 1 10 100; do - for i in 1 10 100; do - for l in 1 10 100; do - TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2" - echo $TEST - $TEST - sleep 2 - done - done -done -done -done - -# Some specific tests to cover specific code paths -./sockmap --cgroup /mnt/cgroup2/ -t sendpage \ - -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3 -./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \ - -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3 -./sockmap --cgroup /mnt/cgroup2/ -t sendpage \ - -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5 -./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \ - -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5 diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py new file mode 100755 index 000000000000..30ba0fee36e4 --- /dev/null +++ b/scripts/bpf_helpers_doc.py @@ -0,0 +1,421 @@ +#!/usr/bin/python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2018 Netronome Systems, Inc. + +# In case user attempts to run with Python 2. +from __future__ import print_function + +import argparse +import re +import sys, os + +class NoHelperFound(BaseException): + pass + +class ParsingError(BaseException): + def __init__(self, line='<line not provided>', reader=None): + if reader: + BaseException.__init__(self, + 'Error at file offset %d, parsing line: %s' % + (reader.tell(), line)) + else: + BaseException.__init__(self, 'Error parsing line: %s' % line) + +class Helper(object): + """ + An object representing the description of an eBPF helper function. + @proto: function prototype of the helper function + @desc: textual description of the helper function + @ret: description of the return value of the helper function + """ + def __init__(self, proto='', desc='', ret=''): + self.proto = proto + self.desc = desc + self.ret = ret + + def proto_break_down(self): + """ + Break down helper function protocol into smaller chunks: return type, + name, distincts arguments. + """ + arg_re = re.compile('^((const )?(struct )?(\w+|...))( (\**)(\w+))?$') + res = {} + proto_re = re.compile('^(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') + + capture = proto_re.match(self.proto) + res['ret_type'] = capture.group(1) + res['ret_star'] = capture.group(2) + res['name'] = capture.group(3) + res['args'] = [] + + args = capture.group(4).split(', ') + for a in args: + capture = arg_re.match(a) + res['args'].append({ + 'type' : capture.group(1), + 'star' : capture.group(6), + 'name' : capture.group(7) + }) + + return res + +class HeaderParser(object): + """ + An object used to parse a file in order to extract the documentation of a + list of eBPF helper functions. All the helpers that can be retrieved are + stored as Helper object, in the self.helpers() array. + @filename: name of file to parse, usually include/uapi/linux/bpf.h in the + kernel tree + """ + def __init__(self, filename): + self.reader = open(filename, 'r') + self.line = '' + self.helpers = [] + + def parse_helper(self): + proto = self.parse_proto() + desc = self.parse_desc() + ret = self.parse_ret() + return Helper(proto=proto, desc=desc, ret=ret) + + def parse_proto(self): + # Argument can be of shape: + # - "void" + # - "type name" + # - "type *name" + # - Same as above, with "const" and/or "struct" in front of type + # - "..." (undefined number of arguments, for bpf_trace_printk()) + # There is at least one term ("void"), and at most five arguments. + p = re.compile('^ \* ((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$') + capture = p.match(self.line) + if not capture: + raise NoHelperFound + self.line = self.reader.readline() + return capture.group(1) + + def parse_desc(self): + p = re.compile('^ \* \tDescription$') + capture = p.match(self.line) + if not capture: + # Helper can have empty description and we might be parsing another + # attribute: return but do not consume. + return '' + # Description can be several lines, some of them possibly empty, and it + # stops when another subsection title is met. + desc = '' + while True: + self.line = self.reader.readline() + if self.line == ' *\n': + desc += '\n' + else: + p = re.compile('^ \* \t\t(.*)') + capture = p.match(self.line) + if capture: + desc += capture.group(1) + '\n' + else: + break + return desc + + def parse_ret(self): + p = re.compile('^ \* \tReturn$') + capture = p.match(self.line) + if not capture: + # Helper can have empty retval and we might be parsing another + # attribute: return but do not consume. + return '' + # Return value description can be several lines, some of them possibly + # empty, and it stops when another subsection title is met. + ret = '' + while True: + self.line = self.reader.readline() + if self.line == ' *\n': + ret += '\n' + else: + p = re.compile('^ \* \t\t(.*)') + capture = p.match(self.line) + if capture: + ret += capture.group(1) + '\n' + else: + break + return ret + + def run(self): + # Advance to start of helper function descriptions. + offset = self.reader.read().find('* Start of BPF helper function descriptions:') + if offset == -1: + raise Exception('Could not find start of eBPF helper descriptions list') + self.reader.seek(offset) + self.reader.readline() + self.reader.readline() + self.line = self.reader.readline() + + while True: + try: + helper = self.parse_helper() + self.helpers.append(helper) + except NoHelperFound: + break + + self.reader.close() + print('Parsed description of %d helper function(s)' % len(self.helpers), + file=sys.stderr) + +############################################################################### + +class Printer(object): + """ + A generic class for printers. Printers should be created with an array of + Helper objects, and implement a way to print them in the desired fashion. + @helpers: array of Helper objects to print to standard output + """ + def __init__(self, helpers): + self.helpers = helpers + + def print_header(self): + pass + + def print_footer(self): + pass + + def print_one(self, helper): + pass + + def print_all(self): + self.print_header() + for helper in self.helpers: + self.print_one(helper) + self.print_footer() + +class PrinterRST(Printer): + """ + A printer for dumping collected information about helpers as a ReStructured + Text page compatible with the rst2man program, which can be used to + generate a manual page for the helpers. + @helpers: array of Helper objects to print to standard output + """ + def print_header(self): + header = '''\ +.. Copyright (C) All BPF authors and contributors from 2014 to present. +.. See git log include/uapi/linux/bpf.h in kernel tree for details. +.. +.. %%%LICENSE_START(VERBATIM) +.. Permission is granted to make and distribute verbatim copies of this +.. manual provided the copyright notice and this permission notice are +.. preserved on all copies. +.. +.. Permission is granted to copy and distribute modified versions of this +.. manual under the conditions for verbatim copying, provided that the +.. entire resulting derived work is distributed under the terms of a +.. permission notice identical to this one. +.. +.. Since the Linux kernel and libraries are constantly changing, this +.. manual page may be incorrect or out-of-date. The author(s) assume no +.. responsibility for errors or omissions, or for damages resulting from +.. the use of the information contained herein. The author(s) may not +.. have taken the same level of care in the production of this manual, +.. which is licensed free of charge, as they might when working +.. professionally. +.. +.. Formatted or processed versions of this manual, if unaccompanied by +.. the source, must acknowledge the copyright and authors of this work. +.. %%%LICENSE_END +.. +.. Please do not edit this file. It was generated from the documentation +.. located in file include/uapi/linux/bpf.h of the Linux kernel sources +.. (helpers description), and from scripts/bpf_helpers_doc.py in the same +.. repository (header and footer). + +=========== +BPF-HELPERS +=========== +------------------------------------------------------------------------------- +list of eBPF helper functions +------------------------------------------------------------------------------- + +:Manual section: 7 + +DESCRIPTION +=========== + +The extended Berkeley Packet Filter (eBPF) subsystem consists in programs +written in a pseudo-assembly language, then attached to one of the several +kernel hooks and run in reaction of specific events. This framework differs +from the older, "classic" BPF (or "cBPF") in several aspects, one of them being +the ability to call special functions (or "helpers") from within a program. +These functions are restricted to a white-list of helpers defined in the +kernel. + +These helpers are used by eBPF programs to interact with the system, or with +the context in which they work. For instance, they can be used to print +debugging messages, to get the time since the system was booted, to interact +with eBPF maps, or to manipulate network packets. Since there are several eBPF +program types, and that they do not run in the same context, each program type +can only call a subset of those helpers. + +Due to eBPF conventions, a helper can not have more than five arguments. + +Internally, eBPF programs call directly into the compiled helper functions +without requiring any foreign-function interface. As a result, calling helpers +introduces no overhead, thus offering excellent performance. + +This document is an attempt to list and document the helpers available to eBPF +developers. They are sorted by chronological order (the oldest helpers in the +kernel at the top). + +HELPERS +======= +''' + print(header) + + def print_footer(self): + footer = ''' +EXAMPLES +======== + +Example usage for most of the eBPF helpers listed in this manual page are +available within the Linux kernel sources, at the following locations: + +* *samples/bpf/* +* *tools/testing/selftests/bpf/* + +LICENSE +======= + +eBPF programs can have an associated license, passed along with the bytecode +instructions to the kernel when the programs are loaded. The format for that +string is identical to the one in use for kernel modules (Dual licenses, such +as "Dual BSD/GPL", may be used). Some helper functions are only accessible to +programs that are compatible with the GNU Privacy License (GPL). + +In order to use such helpers, the eBPF program must be loaded with the correct +license string passed (via **attr**) to the **bpf**\ () system call, and this +generally translates into the C source code of the program containing a line +similar to the following: + +:: + + char ____license[] __attribute__((section("license"), used)) = "GPL"; + +IMPLEMENTATION +============== + +This manual page is an effort to document the existing eBPF helper functions. +But as of this writing, the BPF sub-system is under heavy development. New eBPF +program or map types are added, along with new helper functions. Some helpers +are occasionally made available for additional program types. So in spite of +the efforts of the community, this page might not be up-to-date. If you want to +check by yourself what helper functions exist in your kernel, or what types of +programs they can support, here are some files among the kernel tree that you +may be interested in: + +* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list + of all helper functions, as well as many other BPF definitions including most + of the flags, structs or constants used by the helpers. +* *net/core/filter.c* contains the definition of most network-related helper + functions, and the list of program types from which they can be used. +* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related + helpers. +* *kernel/bpf/verifier.c* contains the functions used to check that valid types + of eBPF maps are used with a given helper function. +* *kernel/bpf/* directory contains other files in which additional helpers are + defined (for cgroups, sockmaps, etc.). + +Compatibility between helper functions and program types can generally be found +in the files where helper functions are defined. Look for the **struct +bpf_func_proto** objects and for functions returning them: these functions +contain a list of helpers that a given program type can call. Note that the +**default:** label of the **switch ... case** used to filter helpers can call +other functions, themselves allowing access to additional helpers. The +requirement for GPL license is also in those **struct bpf_func_proto**. + +Compatibility between helper functions and map types can be found in the +**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*. + +Helper functions that invalidate the checks on **data** and **data_end** +pointers for network processing are listed in function +**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*. + +SEE ALSO +======== + +**bpf**\ (2), +**cgroups**\ (7), +**ip**\ (8), +**perf_event_open**\ (2), +**sendmsg**\ (2), +**socket**\ (7), +**tc-bpf**\ (8)''' + print(footer) + + def print_proto(self, helper): + """ + Format function protocol with bold and italics markers. This makes RST + file less readable, but gives nice results in the manual page. + """ + proto = helper.proto_break_down() + + print('**%s %s%s(' % (proto['ret_type'], + proto['ret_star'].replace('*', '\\*'), + proto['name']), + end='') + + comma = '' + for a in proto['args']: + one_arg = '{}{}'.format(comma, a['type']) + if a['name']: + if a['star']: + one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*')) + else: + one_arg += '** ' + one_arg += '*{}*\\ **'.format(a['name']) + comma = ', ' + print(one_arg, end='') + + print(')**') + + def print_one(self, helper): + self.print_proto(helper) + + if (helper.desc): + print('\tDescription') + # Do not strip all newline characters: formatted code at the end of + # a section must be followed by a blank line. + for line in re.sub('\n$', '', helper.desc, count=1).split('\n'): + print('{}{}'.format('\t\t' if line else '', line)) + + if (helper.ret): + print('\tReturn') + for line in helper.ret.rstrip().split('\n'): + print('{}{}'.format('\t\t' if line else '', line)) + + print('') + +############################################################################### + +# If script is launched from scripts/ from kernel tree and can access +# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse, +# otherwise the --filename argument will be required from the command line. +script = os.path.abspath(sys.argv[0]) +linuxRoot = os.path.dirname(os.path.dirname(script)) +bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h') + +argParser = argparse.ArgumentParser(description=""" +Parse eBPF header file and generate documentation for eBPF helper functions. +The RST-formatted output produced can be turned into a manual page with the +rst2man utility. +""") +if (os.path.isfile(bpfh)): + argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h', + default=bpfh) +else: + argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h') +args = argParser.parse_args() + +# Parse file. +headerParser = HeaderParser(args.filename) +headerParser.run() + +# Print formatted output to standard output. +printer = PrinterRST(headerParser.helpers) +printer.print_all() diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 0e4e923235b6..d004f6382dab 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -26,7 +26,8 @@ MAP COMMANDS | **bpftool** **cgroup help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** } +| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | +| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -63,7 +64,13 @@ DESCRIPTION **egress** egress path of the inet socket (since 4.10); **sock_create** opening of an inet socket (since 4.10); **sock_ops** various socket operations (since 4.12); - **device** device access (since 4.15). + **device** device access (since 4.15); + **bind4** call to bind(2) for an inet4 socket (since 4.17); + **bind6** call to bind(2) for an inet6 socket (since 4.17); + **post_bind4** return from bind(2) for an inet4 socket (since 4.17); + **post_bind6** return from bind(2) for an inet6 socket (since 4.17); + **connect4** call to connect(2) for an inet4 socket (since 4.17); + **connect6** call to connect(2) for an inet6 socket (since 4.17). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 457e868bd32f..5f512b14bff9 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -23,10 +23,10 @@ MAP COMMANDS | **bpftool** **map { show | list }** [*MAP*] | **bpftool** **map dump** *MAP* -| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] -| **bpftool** **map lookup** *MAP* **key** *BYTES* -| **bpftool** **map getnext** *MAP* [**key** *BYTES*] -| **bpftool** **map delete** *MAP* **key** *BYTES* +| **bpftool** **map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] +| **bpftool** **map lookup** *MAP* **key** [**hex**] *BYTES* +| **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*] +| **bpftool** **map delete** *MAP* **key** [**hex**] *BYTES* | **bpftool** **map pin** *MAP* *FILE* | **bpftool** **map help** | @@ -48,20 +48,26 @@ DESCRIPTION **bpftool map dump** *MAP* Dump all entries in a given *MAP*. - **bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] + **bpftool map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] Update map entry for a given *KEY*. *UPDATE_FLAGS* can be one of: **any** update existing entry or add if doesn't exit; **exist** update only if entry already exists; **noexist** update only if entry doesn't exist. - **bpftool map lookup** *MAP* **key** *BYTES* + If the **hex** keyword is provided in front of the bytes + sequence, the bytes are parsed as hexadeximal values, even if + no "0x" prefix is added. If the keyword is not provided, then + the bytes are parsed as decimal values, unless a "0x" prefix + (for hexadecimal) or a "0" prefix (for octal) is provided. + + **bpftool map lookup** *MAP* **key** [**hex**] *BYTES* Lookup **key** in the map. - **bpftool map getnext** *MAP* [**key** *BYTES*] + **bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*] Get next key. If *key* is not specified, get first key. - **bpftool map delete** *MAP* **key** *BYTES* + **bpftool map delete** *MAP* **key** [**hex**] *BYTES* Remove entry from the map. **bpftool map pin** *MAP* *FILE* @@ -98,7 +104,12 @@ EXAMPLES 10: hash name some_map flags 0x0 key 4B value 8B max_entries 2048 memlock 167936B -**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04** +The following three commands are equivalent: + +| +| **# bpftool map update id 10 key hex 20 c4 b7 00 value hex 0f ff ff ab 01 02 03 4c** +| **# bpftool map update id 10 key 0x20 0xc4 0xb7 0x00 value 0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c** +| **# bpftool map update id 10 key 32 196 183 0 value 15 255 255 171 1 2 3 76** **# bpftool map lookup id 10 key 0 1 2 3** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 67ca6c69376c..43d34a5c3ec5 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -95,7 +95,7 @@ EXAMPLES **# bpftool prog show** :: - 10: xdp name some_prog tag 005a3d2123620c8b + 10: xdp name some_prog tag 005a3d2123620c8b gpl loaded_at Sep 29/20:11 uid 0 xlated 528B jited 370B memlock 4096B map_ids 10 @@ -108,6 +108,7 @@ EXAMPLES "id": 10, "type": "xdp", "tag": "005a3d2123620c8b", + "gpl_compatible": true, "loaded_at": "Sep 29/20:11", "uid": 0, "bytes_xlated": 528, diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 490811b45fa7..852d84a98acd 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -147,7 +147,7 @@ _bpftool() # Deal with simplest keywords case $prev in - help|key|opcodes|visual) + help|hex|opcodes|visual) return 0 ;; tag) @@ -283,7 +283,7 @@ _bpftool() return 0 ;; key) - return 0 + COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; *) _bpftool_once_attr 'key' @@ -302,7 +302,7 @@ _bpftool() return 0 ;; key) - return 0 + COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; value) # We can have bytes, or references to a prog or a @@ -321,6 +321,8 @@ _bpftool() return 0 ;; *) + COMPREPLY+=( $( compgen -W 'hex' \ + -- "$cur" ) ) return 0 ;; esac @@ -372,7 +374,8 @@ _bpftool() ;; attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ - device' + device bind4 bind6 post_bind4 post_bind6 connect4 \ + connect6' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag' case $prev in @@ -380,7 +383,8 @@ _bpftool() _filedir return 0 ;; - ingress|egress|sock_create|sock_ops|device) + ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ + post_bind4|post_bind6|connect4|connect6) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index cae32a61cb18..1351bd6b5aa7 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -16,8 +16,11 @@ #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" -#define HELP_SPEC_ATTACH_TYPES \ - "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }" +#define HELP_SPEC_ATTACH_TYPES \ + " ATTACH_TYPE := { ingress | egress | sock_create |\n" \ + " sock_ops | device | bind4 | bind6 |\n" \ + " post_bind4 | post_bind6 | connect4 |\n" \ + " connect6 }" static const char * const attach_type_strings[] = { [BPF_CGROUP_INET_INGRESS] = "ingress", @@ -25,6 +28,12 @@ static const char * const attach_type_strings[] = { [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", [BPF_CGROUP_SOCK_OPS] = "sock_ops", [BPF_CGROUP_DEVICE] = "device", + [BPF_CGROUP_INET4_BIND] = "bind4", + [BPF_CGROUP_INET6_BIND] = "bind6", + [BPF_CGROUP_INET4_CONNECT] = "connect4", + [BPF_CGROUP_INET6_CONNECT] = "connect6", + [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", + [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -282,7 +291,7 @@ static int do_help(int argc, char **argv) " %s %s detach CGROUP ATTACH_TYPE PROG\n" " %s %s help\n" "\n" - " " HELP_SPEC_ATTACH_TYPES "\n" + HELP_SPEC_ATTACH_TYPES "\n" " " HELP_SPEC_ATTACH_FLAGS "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index f509c86faede..a6cdb640a0d7 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -283,11 +283,16 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, static char **parse_bytes(char **argv, const char *name, unsigned char *val, unsigned int n) { - unsigned int i = 0; + unsigned int i = 0, base = 0; char *endptr; + if (is_prefix(*argv, "hex")) { + base = 16; + argv++; + } + while (i < n && argv[i]) { - val[i] = strtoul(argv[i], &endptr, 0); + val[i] = strtoul(argv[i], &endptr, base); if (*endptr) { p_err("error parsing byte: %s", argv[i]); return NULL; @@ -869,10 +874,10 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [MAP]\n" " %s %s dump MAP\n" - " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" - " %s %s lookup MAP key BYTES\n" - " %s %s getnext MAP [key BYTES]\n" - " %s %s delete MAP key BYTES\n" + " %s %s update MAP key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n" + " %s %s lookup MAP key [hex] BYTES\n" + " %s %s getnext MAP [key [hex] BYTES]\n" + " %s %s delete MAP key [hex] BYTES\n" " %s %s pin MAP FILE\n" " %s %s help\n" "\n" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f7a810897eac..e71a0a11afde 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -68,6 +68,9 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", [BPF_PROG_TYPE_SK_SKB] = "sk_skb", [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", }; static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) @@ -232,6 +235,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible); + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); if (info->load_time) { @@ -292,6 +297,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); + printf("%s", info->gpl_compatible ? " gpl" : ""); printf("\n"); if (info->load_time) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c5ec89732a8d..da77a9388947 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -95,6 +95,7 @@ enum bpf_cmd { BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, }; enum bpf_map_type { @@ -279,6 +280,9 @@ union bpf_attr { */ char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_id; /* BTF type_id of the key */ + __u32 btf_value_id; /* BTF type_id of the value */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -363,398 +367,1406 @@ union bpf_attr { __u64 name; __u32 prog_fd; } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + }; } __attribute__((aligned(8))); -/* BPF helper function descriptions: - * - * void *bpf_map_lookup_elem(&map, &key) - * Return: Map value or NULL - * - * int bpf_map_update_elem(&map, &key, &value, flags) - * Return: 0 on success or negative error - * - * int bpf_map_delete_elem(&map, &key) - * Return: 0 on success or negative error - * - * int bpf_probe_read(void *dst, int size, void *src) - * Return: 0 on success or negative error +/* The description below is an attempt at providing documentation to eBPF + * developers about the multiple available eBPF helper functions. It can be + * parsed and used to produce a manual page. The workflow is the following, + * and requires the rst2man utility: + * + * $ ./scripts/bpf_helpers_doc.py \ + * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst + * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 + * $ man /tmp/bpf-helpers.7 + * + * Note that in order to produce this external documentation, some RST + * formatting is used in the descriptions to get "bold" and "italics" in + * manual pages. Also note that the few trailing white spaces are + * intentional, removing them would break paragraphs for rst2man. + * + * Start of BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + * Description + * Perform a lookup in *map* for an entry associated to *key*. + * Return + * Map value associated to *key*, or **NULL** if no entry was + * found. + * + * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * Description + * Add or update the value of the entry associated to *key* in + * *map* with *value*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * Flag value **BPF_NOEXIST** cannot be used for maps of types + * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all + * elements always exist), the helper would return an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * Description + * Delete entry with *key* from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read(void *dst, u32 size, const void *src) + * Description + * For tracing programs, safely attempt to read *size* bytes from + * address *src* and store the data in *dst*. + * Return + * 0 on success, or a negative error in case of failure. * * u64 bpf_ktime_get_ns(void) - * Return: current ktime - * - * int bpf_trace_printk(const char *fmt, int fmt_size, ...) - * Return: length of buffer written or negative error - * - * u32 bpf_prandom_u32(void) - * Return: random value - * - * u32 bpf_raw_smp_processor_id(void) - * Return: SMP processor ID - * - * int bpf_skb_store_bytes(skb, offset, from, len, flags) - * store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_l3_csum_replace(skb, offset, from, to, flags) - * recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_l4_csum_replace(skb, offset, from, to, flags) - * recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success or negative error - * - * int bpf_tail_call(ctx, prog_array_map, index) - * jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: 32-bit index inside array that selects specific program to run - * Return: 0 on success or negative error - * - * int bpf_clone_redirect(skb, ifindex, flags) - * redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success or negative error + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Return + * Current *ktime*. + * + * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * Description + * This helper is a "printk()-like" facility for debugging. It + * prints a message defined by format *fmt* (of size *fmt_size*) + * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * available. It can take up to three additional **u64** + * arguments (as an eBPF helpers, the total number of arguments is + * limited to five). + * + * Each time the helper is called, it appends a line to the trace. + * The format of the trace is customizable, and the exact output + * one will get depends on the options set in + * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *README* file under the same directory). However, it usually + * defaults to something like: + * + * :: + * + * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> + * + * In the above: + * + * * ``telnet`` is the name of the current task. + * * ``470`` is the PID of the current task. + * * ``001`` is the CPU number on which the task is + * running. + * * In ``.N..``, each character refers to a set of + * options (whether irqs are enabled, scheduling + * options, whether hard/softirqs are running, level of + * preempt_disabled respectively). **N** means that + * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** + * are set. + * * ``419421.045894`` is a timestamp. + * * ``0x00000001`` is a fake value used by BPF for the + * instruction pointer register. + * * ``<formatted msg>`` is the message formatted with + * *fmt*. + * + * The conversion specifiers supported by *fmt* are similar, but + * more limited than for printk(). They are **%d**, **%i**, + * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, + * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size + * of field, padding with zeroes, etc.) is available, and the + * helper will return **-EINVAL** (but print nothing) if it + * encounters an unknown specifier. + * + * Also, note that **bpf_trace_printk**\ () is slow, and should + * only be used for debugging purposes. For this reason, a notice + * bloc (spanning several lines) is printed to kernel logs and + * states that the helper should not be used "for production use" + * the first time this helper is used (or more precisely, when + * **trace_printk**\ () buffers are allocated). For passing values + * to user space, perf events should be preferred. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * u32 bpf_get_prandom_u32(void) + * Description + * Get a pseudo-random number. + * + * From a security point of view, this helper uses its own + * pseudo-random internal state, and cannot be used to infer the + * seed of other random functions in the kernel. However, it is + * essential to note that the generator used by the helper is not + * cryptographically secure. + * Return + * A random 32-bit unsigned value. + * + * u32 bpf_get_smp_processor_id(void) + * Description + * Get the SMP (symmetric multiprocessing) processor id. Note that + * all programs run with preemption disabled, which means that the + * SMP processor id is stable during all the execution of the + * program. + * Return + * The SMP id of the processor running the program. + * + * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. *flags* are a combination of + * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the + * checksum for the packet after storing the bytes) and + * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ + * **->swhash** and *skb*\ **->l4hash** to 0). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * Description + * Recompute the layer 3 (e.g. IP) checksum for the packet + * associated to *skb*. Computation is incremental, so the helper + * must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored in *size*. + * Alternatively, it is possible to store the difference between + * the previous and the new values of the header field in *to*, by + * setting *from* and *size* to 0. For both methods, *offset* + * indicates the location of the IP checksum within the packet. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * Description + * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the + * packet associated to *skb*. Computation is incremental, so the + * helper must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored on the lowest + * four bits of *flags*. Alternatively, it is possible to store + * the difference between the previous and the new values of the + * header field in *to*, by setting *from* and the four lowest + * bits of *flags* to 0. For both methods, *offset* indicates the + * location of the IP checksum within the packet. In addition to + * the size of the field, *flags* can be added (bitwise OR) actual + * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left + * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * Description + * This special helper is used to trigger a "tail call", or in + * other words, to jump into another eBPF program. The same stack + * frame is used (but values on stack and in registers for the + * caller are not accessible to the callee). This mechanism allows + * for program chaining, either for raising the maximum number of + * available eBPF instructions, or to execute given programs in + * conditional blocks. For security reasons, there is an upper + * limit to the number of successive tail calls that can be + * performed. + * + * Upon call of this helper, the program attempts to jump into a + * program referenced at index *index* in *prog_array_map*, a + * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes + * *ctx*, a pointer to the context. + * + * If the call succeeds, the kernel immediately runs the first + * instruction of the new program. This is not a function call, + * and it never returns to the previous program. If the call + * fails, then the helper has no effect, and the caller continues + * to run its subsequent instructions. A call can fail if the + * destination program for the jump does not exist (i.e. *index* + * is superior to the number of entries in *prog_array_map*), or + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), + * which is currently set to 32. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * Description + * Clone and redirect the packet associated to *skb* to another + * net device of index *ifindex*. Both ingress and egress + * interfaces can be used for redirection. The **BPF_F_INGRESS** + * value in *flags* is used to make the distinction (ingress path + * is selected if the flag is present, egress path otherwise). + * This is the only flag supported for now. + * + * In comparison with **bpf_redirect**\ () helper, + * **bpf_clone_redirect**\ () has the associated cost of + * duplicating the packet buffer, but this can be executed out of + * the eBPF program. Conversely, **bpf_redirect**\ () is more + * efficient, but it is handled through an action code where the + * redirection happens only after the eBPF program has returned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid + * Return + * A 64-bit integer containing the current tgid and pid, and + * created as such: + * *current_task*\ **->tgid << 32 \|** + * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - * - * int bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success or negative error - * - * u32 bpf_get_cgroup_classid(skb) - * retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - * - * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) - * Return: 0 on success or negative error - * - * int bpf_skb_vlan_pop(skb) - * Return: 0 on success or negative error - * - * int bpf_skb_get_tunnel_key(skb, key, size, flags) - * int bpf_skb_set_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Return: 0 on success or negative error - * - * u64 bpf_perf_event_read(map, flags) - * read perf event counter value - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * Return: value of perf event counter read or error code - * - * int bpf_redirect(ifindex, flags) - * redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: - * cls_bpf: - * bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * xdp_bpf: - * all bits - reserved - * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error - * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error - * int bpf_redirect_map(map, key, flags) - * redirect to endpoint in map - * @map: pointer to dev map - * @key: index in map to lookup - * @flags: -- - * Return: XDP_REDIRECT on success or XDP_ABORT on error - * - * u32 bpf_get_route_realm(skb) - * retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - * - * int bpf_perf_event_output(ctx, map, flags, data, size) - * output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success or negative error - * - * int bpf_get_stackid(ctx, map, flags) - * walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - * - * s64 bpf_csum_diff(from, from_size, to, to_size, seed) - * calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result or negative error code - * - * int bpf_skb_get_tunnel_opt(skb, opt, size) - * retrieve tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: option size - * - * int bpf_skb_set_tunnel_opt(skb, opt, size) - * populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success or negative error - * - * int bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is v4 -> v6, - * v6 -> v4 transitions. The helper will also resize the skb. eBPF - * program is expected to fill the new headers via skb_store_bytes - * and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - * - * int bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - * - * int bpf_skb_under_cgroup(skb, map, index) - * Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - * - * u32 bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash + * Return + * A 64-bit integer containing the current GID and UID, and + * created as such: *current_gid* **<< 32 \|** *current_uid*. + * + * int bpf_get_current_comm(char *buf, u32 size_of_buf) + * Description + * Copy the **comm** attribute of the current task into *buf* of + * *size_of_buf*. The **comm** attribute contains the name of + * the executable (excluding the path) for the current task. The + * *size_of_buf* must be strictly positive. On success, the + * helper makes sure that the *buf* is NUL-terminated. On failure, + * it is filled with zeroes. + * Return + * 0 on success, or a negative error in case of failure. + * + * u32 bpf_get_cgroup_classid(struct sk_buff *skb) + * Description + * Retrieve the classid for the current task, i.e. for the net_cls + * cgroup to which *skb* belongs. + * + * This helper can be used on TC egress path, but not on ingress. + * + * The net_cls cgroup provides an interface to tag network packets + * based on a user-provided identifier for all traffic coming from + * the tasks belonging to the related cgroup. See also the related + * kernel documentation, available from the Linux sources in file + * *Documentation/cgroup-v1/net_cls.txt*. + * + * The Linux kernel has two versions for cgroups: there are + * cgroups v1 and cgroups v2. Both are available to users, who can + * use a mixture of them, but note that the net_cls cgroup is for + * cgroup v1 only. This makes it incompatible with BPF programs + * run on cgroups, which is a cgroup-v2-only feature (a socket can + * only hold data for one version of cgroups at a time). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to + * "**y**" or to "**m**". + * Return + * The classid, or 0 for the default unconfigured classid. + * + * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * Description + * Push a *vlan_tci* (VLAN tag control information) of protocol + * *vlan_proto* to the packet associated to *skb*, then update + * the checksum. Note that if *vlan_proto* is different from + * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to + * be **ETH_P_8021Q**. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_vlan_pop(struct sk_buff *skb) + * Description + * Pop a VLAN header from the packet associated to *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Get tunnel metadata. This helper takes a pointer *key* to an + * empty **struct bpf_tunnel_key** of **size**, that will be + * filled with tunnel metadata for the packet associated to *skb*. + * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which + * indicates that the tunnel is based on IPv6 protocol instead of + * IPv4. + * + * The **struct bpf_tunnel_key** is an object that generalizes the + * principal parameters used by various tunneling protocols into a + * single struct. This way, it can be used to easily make a + * decision based on the contents of the encapsulation header, + * "summarized" in this struct. In particular, it holds the IP + * address of the remote end (IPv4 or IPv6, depending on the case) + * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, + * this struct exposes the *key*\ **->tunnel_id**, which is + * generally mapped to a VNI (Virtual Network Identifier), making + * it programmable together with the **bpf_skb_set_tunnel_key**\ + * () helper. + * + * Let's imagine that the following code is part of a program + * attached to the TC ingress interface, on one end of a GRE + * tunnel, and is supposed to filter out all messages coming from + * remote ends with IPv4 address other than 10.0.0.1: + * + * :: + * + * int ret; + * struct bpf_tunnel_key key = {}; + * + * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + * if (ret < 0) + * return TC_ACT_SHOT; // drop packet + * + * if (key.remote_ipv4 != 0x0a000001) + * return TC_ACT_SHOT; // drop packet + * + * return TC_ACT_OK; // accept packet + * + * This interface can also be used with all encapsulation devices + * that can operate in "collect metadata" mode: instead of having + * one network device per specific configuration, the "collect + * metadata" mode only requires a single device where the + * configuration can be extracted from this helper. + * + * This can be used together with various tunnels such as VXLan, + * Geneve, GRE or IP in IP (IPIP). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Populate tunnel metadata for packet associated to *skb.* The + * tunnel metadata is set to the contents of *key*, of *size*. The + * *flags* can be set to a combination of the following values: + * + * **BPF_F_TUNINFO_IPV6** + * Indicate that the tunnel is based on IPv6 protocol + * instead of IPv4. + * **BPF_F_ZERO_CSUM_TX** + * For IPv4 packets, add a flag to tunnel metadata + * indicating that checksum computation should be skipped + * and checksum set to zeroes. + * **BPF_F_DONT_FRAGMENT** + * Add a flag to tunnel metadata indicating that the + * packet should not be fragmented. + * **BPF_F_SEQ_NUMBER** + * Add a flag to tunnel metadata indicating that a + * sequence number should be added to tunnel header before + * sending the packet. This flag was added for GRE + * encapsulation, but might be used with other protocols + * as well in the future. + * + * Here is a typical usage on the transmit path: + * + * :: + * + * struct bpf_tunnel_key key; + * populate key ... + * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); + * + * See also the description of the **bpf_skb_get_tunnel_key**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) + * Description + * Read the value of a perf event counter. This helper relies on a + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of + * the perf event counter is selected when *map* is updated with + * perf event file descriptors. The *map* is an array whose size + * is the number of available CPUs, and each cell contains a value + * relative to one CPU. The value to retrieve is indicated by + * *flags*, that contains the index of the CPU to look up, masked + * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * Note that before Linux 4.13, only hardware perf event can be + * retrieved. + * + * Also, be aware that the newer helper + * **bpf_perf_event_read_value**\ () is recommended over + * **bpf_perf_event_read*\ () in general. The latter has some ABI + * quirks where error and counter value are used as a return code + * (which is wrong to do since ranges may overlap). This issue is + * fixed with bpf_perf_event_read_value(), which at the same time + * provides more features over the **bpf_perf_event_read**\ () + * interface. Please refer to the description of + * **bpf_perf_event_read_value**\ () for details. + * Return + * The value of the perf event counter read from the map, or a + * negative error code in case of failure. + * + * int bpf_redirect(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_clone_redirect**\ + * (), except that the packet is not cloned, which provides + * increased performance. + * + * Except for XDP, both ingress and egress interfaces can be used + * for redirection. The **BPF_F_INGRESS** value in *flags* is used + * to make the distinction (ingress path is selected if the flag + * is present, egress path otherwise). Currently, XDP only + * supports redirection to the egress interface, and accepts no + * flag at all. + * + * The same effect can be attained with the more generic + * **bpf_redirect_map**\ (), which requires specific maps to be + * used but offers better performance. + * Return + * For XDP, the helper returns **XDP_REDIRECT** on success or + * **XDP_ABORTED** on error. For other program types, the values + * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on + * error. + * + * u32 bpf_get_route_realm(struct sk_buff *skb) + * Description + * Retrieve the realm or the route, that is to say the + * **tclassid** field of the destination for the *skb*. The + * indentifier retrieved is a user-provided tag, similar to the + * one used with the net_cls cgroup (see description for + * **bpf_get_cgroup_classid**\ () helper), but here this tag is + * held by a route (a destination entry), not by a task. + * + * Retrieving this identifier works with the clsact TC egress hook + * (see also **tc-bpf(8)**), or alternatively on conventional + * classful egress qdiscs, but not on TC ingress path. In case of + * clsact TC egress hook, this has the advantage that, internally, + * the destination entry has not been dropped yet in the transmit + * path. Therefore, the destination entry does not need to be + * artificially held via **netif_keep_dst**\ () for a classful + * qdisc until the *skb* is freed. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_IP_ROUTE_CLASSID** configuration option. + * Return + * The realm of the route for the packet associated to *skb*, or 0 + * if none was found. + * + * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * The context of the program *ctx* needs also be passed to the + * helper. + * + * On user space, a program willing to read the values needs to + * call **perf_event_open**\ () on the perf event (either for + * one or for all CPUs) and to store the file descriptor into the + * *map*. This must be done before the eBPF program can send data + * into it. An example is available in file + * *samples/bpf/trace_output_user.c* in the Linux kernel source + * tree (the eBPF program counterpart is in + * *samples/bpf/trace_output_kern.c*). + * + * **bpf_perf_event_output**\ () achieves better performance + * than **bpf_trace_printk**\ () for sharing data with user + * space, and is much better suitable for streaming data from eBPF + * programs. + * + * Note that this helper is not restricted to tracing use cases + * and can be used with programs attached to TC or XDP as well, + * where it allows for passing data to user space listeners. Data + * can be: + * + * * Only custom structs, + * * Only the packet payload, or + * * A combination of both. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) + * Description + * This helper was provided as an easy way to load data from a + * packet. It can be used to load *len* bytes from *offset* from + * the packet associated to *skb*, into the buffer pointed by + * *to*. + * + * Since Linux 4.7, usage of this helper has mostly been replaced + * by "direct packet access", enabling packet data to be + * manipulated with *skb*\ **->data** and *skb*\ **->data_end** + * pointing respectively to the first byte of packet data and to + * the byte after the last byte of packet data. However, it + * remains useful if one wishes to read large quantities of data + * at once from a packet into the eBPF stack. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) + * Description + * Walk a user or a kernel stack and return its id. To achieve + * this, the helper needs *ctx*, which is a pointer to the context + * on which the tracing program is executed, and a pointer to a + * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * a combination of the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_FAST_STACK_CMP** + * Compare stacks by hash only. + * **BPF_F_REUSE_STACKID** + * If two different stacks hash into the same *stackid*, + * discard the old one. + * + * The stack id retrieved is a 32 bit long integer handle which + * can be further combined with other data (including other stack + * ids) and used as a key into maps. This can be useful for + * generating a variety of graphs (such as flame graphs or off-cpu + * graphs). + * + * For walking a stack, this helper is an improvement over + * **bpf_probe_read**\ (), which can be used with unrolled loops + * but is not efficient and consumes a lot of eBPF instructions. + * Instead, **bpf_get_stackid**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * + * Return + * The positive or null stack id on success, or a negative error + * in case of failure. + * + * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) + * Description + * Compute a checksum difference, from the raw buffer pointed by + * *from*, of length *from_size* (that must be a multiple of 4), + * towards the raw buffer pointed by *to*, of size *to_size* + * (same remark). An optional *seed* can be added to the value + * (this can be cascaded, the seed may come from a previous call + * to the helper). + * + * This is flexible enough to be used in several ways: + * + * * With *from_size* == 0, *to_size* > 0 and *seed* set to + * checksum, it can be used when pushing new data. + * * With *from_size* > 0, *to_size* == 0 and *seed* set to + * checksum, it can be used when removing data from a packet. + * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it + * can be used to compute a diff. Note that *from_size* and + * *to_size* do not need to be equal. + * + * This helper can be used in combination with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to + * which one can feed in the difference computed with + * **bpf_csum_diff**\ (). + * Return + * The checksum result, or a negative error code in case of + * failure. + * + * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Retrieve tunnel options metadata for the packet associated to + * *skb*, and store the raw tunnel option data to the buffer *opt* + * of *size*. + * + * This helper can be used with encapsulation devices that can + * operate in "collect metadata" mode (please refer to the related + * note in the description of **bpf_skb_get_tunnel_key**\ () for + * more details). A particular example where this can be used is + * in combination with the Geneve encapsulation protocol, where it + * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) + * and retrieving arbitrary TLVs (Type-Length-Value headers) from + * the eBPF program. This allows for full customization of these + * headers. + * Return + * The size of the option data retrieved. + * + * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Set tunnel options metadata for the packet associated to *skb* + * to the option data contained in the raw buffer *opt* of *size*. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * Description + * Change the protocol of the *skb* to *proto*. Currently + * supported are transition from IPv4 to IPv6, and from IPv6 to + * IPv4. The helper takes care of the groundwork for the + * transition, including resizing the socket buffer. The eBPF + * program is expected to fill the new headers, if any, via + * **skb_store_bytes**\ () and to recompute the checksums with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ + * (). The main case for this helper is to perform NAT64 + * operations out of an eBPF program. + * + * Internally, the GSO type is marked as dodgy so that headers are + * checked and segments are recalculated by the GSO/GRO engine. + * The size for GSO target is adapted as well. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * Description + * Change the packet type for the packet associated to *skb*. This + * comes down to setting *skb*\ **->pkt_type** to *type*, except + * the eBPF program does not have a write access to *skb*\ + * **->pkt_type** beside this helper. Using a helper here allows + * for graceful handling of errors. + * + * The major use case is to change incoming *skb*s to + * **PACKET_HOST** in a programmatic way instead of having to + * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for + * example. + * + * Note that *type* only allows certain values. At this time, they + * are: + * + * **PACKET_HOST** + * Packet is for us. + * **PACKET_BROADCAST** + * Send packet to all. + * **PACKET_MULTICAST** + * Send packet to group. + * **PACKET_OTHERHOST** + * Send packet to someone else. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * Description + * Check whether *skb* is a descendant of the cgroup2 held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* failed the cgroup2 descendant test. + * * 1, if the *skb* succeeded the cgroup2 descendant test. + * * A negative error code, if an error occurred. + * + * u32 bpf_get_hash_recalc(struct sk_buff *skb) + * Description + * Retrieve the hash of the packet, *skb*\ **->hash**. If it is + * not set, in particular if the hash was cleared due to mangling, + * recompute this hash. Later accesses to the hash can be done + * directly with *skb*\ **->hash**. + * + * Calling **bpf_set_hash_invalid**\ (), changing a packet + * prototype with **bpf_skb_change_proto**\ (), or calling + * **bpf_skb_store_bytes**\ () with the + * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear + * the hash and to trigger a new computation for the next call to + * **bpf_get_hash_recalc**\ (). + * Return + * The 32-bit hash. * * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - * - * int bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - * - * int bpf_current_task_under_cgroup(map, index) - * Check cgroup2 membership of current task - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 current failed the cgroup2 descendant test - * == 1 current succeeded the cgroup2 descendant test - * < 0 error - * - * int bpf_skb_change_tail(skb, len, flags) - * The helper will resize the skb to the given new size, to be used f.e. - * with control messages. - * @skb: pointer to skb - * @len: new skb length - * @flags: reserved - * Return: 0 on success or negative error - * - * int bpf_skb_pull_data(skb, len) - * The helper will pull in non-linear data in case the skb is non-linear - * and not all of len are part of the linear section. Only needed for - * read/write with direct packet access. - * @skb: pointer to skb - * @len: len to make read/writeable - * Return: 0 on success or negative error - * - * s64 bpf_csum_update(skb, csum) - * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. - * @skb: pointer to skb - * @csum: csum to add - * Return: csum on success or negative error - * - * void bpf_set_hash_invalid(skb) - * Invalidate current skb->hash. - * @skb: pointer to skb - * - * int bpf_get_numa_node_id() - * Return: Id of current NUMA node. - * - * int bpf_skb_change_head() - * Grows headroom of skb and adjusts MAC header offset accordingly. - * Will extends/reallocae as required automatically. - * May change skb data pointer and will thus invalidate any check - * performed for direct packet access. - * @skb: pointer to skb - * @len: length of header to be pushed in front - * @flags: Flags (unused for now) - * Return: 0 on success or negative error - * - * int bpf_xdp_adjust_head(xdp_md, delta) - * Adjust the xdp_md.data by delta - * @xdp_md: pointer to xdp_md - * @delta: An positive/negative integer to be added to xdp_md.data - * Return: 0 on success or negative on error + * Return + * A pointer to the current task struct. + * + * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * Description + * Attempt in a safe way to write *len* bytes from the buffer + * *src* to *dst* in memory. It only works for threads that are in + * user context, and *dst* must be a valid user space address. + * + * This helper should not be used to implement any kind of + * security mechanism because of TOC-TOU attacks, but rather to + * debug, divert, and manipulate execution of semi-cooperative + * processes. + * + * Keep in mind that this feature is meant for experiments, and it + * has a risk of crashing the system and running programs. + * Therefore, when an eBPF program using this helper is attached, + * a warning including PID and process name is printed to kernel + * logs. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * Description + * Check whether the probe is being run is the context of a given + * subset of the cgroup2 hierarchy. The cgroup2 to test is held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* task belongs to the cgroup2. + * * 1, if the *skb* task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Resize (trim or grow) the packet associated to *skb* to the + * new *len*. The *flags* are reserved for future usage, and must + * be left at zero. + * + * The basic idea is that the helper performs the needed work to + * change the size of the packet, then the eBPF program rewrites + * the rest via helpers like **bpf_skb_store_bytes**\ (), + * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () + * and others. This helper is a slow path utility intended for + * replies with control messages. And because it is targeted for + * slow path, the helper itself can afford to be slow: it + * implicitly linearizes, unclones and drops offloads from the + * *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * Description + * Pull in non-linear data in case the *skb* is non-linear and not + * all of *len* are part of the linear section. Make *len* bytes + * from *skb* readable and writable. If a zero value is passed for + * *len*, then the whole length of the *skb* is pulled. + * + * This helper is only needed for reading and writing with direct + * packet access. + * + * For direct packet access, testing that offsets to access + * are within packet boundaries (test on *skb*\ **->data_end**) is + * susceptible to fail if offsets are invalid, or if the requested + * data is in non-linear parts of the *skb*. On failure the + * program can just bail out, or in the case of a non-linear + * buffer, use a helper to make the data available. The + * **bpf_skb_load_bytes**\ () helper is a first solution to access + * the data. Another one consists in using **bpf_skb_pull_data** + * to pull in once the non-linear parts, then retesting and + * eventually access the data. + * + * At the same time, this also makes sure the *skb* is uncloned, + * which is a necessary condition for direct write. As this needs + * to be an invariant for the write part only, the verifier + * detects writes and adds a prologue that is calling + * **bpf_skb_pull_data()** to effectively unclone the *skb* from + * the very beginning in case it is indeed cloned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) + * Description + * Add the checksum *csum* into *skb*\ **->csum** in case the + * driver has supplied a checksum for the entire packet into that + * field. Return an error otherwise. This helper is intended to be + * used in combination with **bpf_csum_diff**\ (), in particular + * when the checksum needs to be updated after data has been + * written into the packet through direct packet access. + * Return + * The checksum on success, or a negative error code in case of + * failure. + * + * void bpf_set_hash_invalid(struct sk_buff *skb) + * Description + * Invalidate the current *skb*\ **->hash**. It can be used after + * mangling on headers through direct packet access, in order to + * indicate that the hash is outdated and to trigger a + * recalculation the next time the kernel tries to access this + * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * + * int bpf_get_numa_node_id(void) + * Description + * Return the id of the current NUMA node. The primary use case + * for this helper is the selection of sockets for the local NUMA + * node, when the program is attached to sockets using the + * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), + * but the helper is also available to other eBPF program types, + * similarly to **bpf_get_smp_processor_id**\ (). + * Return + * The id of current NUMA node. + * + * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Grows headroom of packet associated to *skb* and adjusts the + * offset of the MAC header accordingly, adding *len* bytes of + * space. It automatically extends and reallocates memory as + * required. + * + * This helper can be used on a layer 3 *skb* to push a MAC header + * for redirection into a layer 2 device. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that + * it is possible to use a negative value for *delta*. This helper + * can be used to prepare the packet for pushing or popping + * headers. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. * * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) - * Copy a NUL terminated string from unsafe address. In case the string - * length is smaller than size, the target is not padded with further NUL - * bytes. In case the string length is larger than size, just count-1 - * bytes are copied and the last byte is set to NUL. - * @dst: destination address - * @size: maximum number of bytes to copy, including the trailing NUL - * @unsafe_ptr: unsafe address - * Return: - * > 0 length of the string including the trailing NUL on success - * < 0 error - * - * u64 bpf_get_socket_cookie(skb) - * Get the cookie for the socket stored inside sk_buff. - * @skb: pointer to skb - * Return: 8 Bytes non-decreasing number on success or 0 if the socket - * field is missing inside sk_buff - * - * u32 bpf_get_socket_uid(skb) - * Get the owner uid of the socket stored inside sk_buff. - * @skb: pointer to skb - * Return: uid of the socket owner on success or overflowuid if failed. - * - * u32 bpf_set_hash(skb, hash) - * Set full skb->hash. - * @skb: pointer to skb - * @hash: hash to set - * - * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) - * Calls setsockopt. Not all opts are available, only those with - * integer optvals plus TCP_CONGESTION. - * Supported levels: SOL_SOCKET and IPPROTO_TCP - * @bpf_socket: pointer to bpf_socket - * @level: SOL_SOCKET or IPPROTO_TCP - * @optname: option name - * @optval: pointer to option value - * @optlen: length of optval in bytes - * Return: 0 or negative error - * - * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) - * Calls getsockopt. Not all opts are available. - * Supported levels: IPPROTO_TCP - * @bpf_socket: pointer to bpf_socket - * @level: IPPROTO_TCP - * @optname: option name - * @optval: pointer to option value - * @optlen: length of optval in bytes - * Return: 0 or negative error - * - * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) - * Set callback flags for sock_ops - * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct - * @flags: flags value - * Return: 0 for no error - * -EINVAL if there is no full tcp socket - * bits in flags that are not supported by current kernel - * - * int bpf_skb_adjust_room(skb, len_diff, mode, flags) - * Grow or shrink room in sk_buff. - * @skb: pointer to skb - * @len_diff: (signed) amount of room to grow/shrink - * @mode: operation mode (enum bpf_adj_room_mode) - * @flags: reserved for future use - * Return: 0 on success or negative error code - * - * int bpf_sk_redirect_map(map, key, flags) - * Redirect skb to a sock in map using key as a lookup key for the - * sock in map. - * @map: pointer to sockmap - * @key: key to lookup sock in map - * @flags: reserved for future use - * Return: SK_PASS - * - * int bpf_sock_map_update(skops, map, key, flags) - * @skops: pointer to bpf_sock_ops - * @map: pointer to sockmap to update - * @key: key to insert/update sock in map - * @flags: same flags as map update elem - * - * int bpf_xdp_adjust_meta(xdp_md, delta) - * Adjust the xdp_md.data_meta by delta - * @xdp_md: pointer to xdp_md - * @delta: An positive/negative integer to be added to xdp_md.data_meta - * Return: 0 on success or negative on error - * - * int bpf_perf_event_read_value(map, flags, buf, buf_size) - * read perf event counter value and perf event enabled/running time - * @map: pointer to perf_event_array map - * @flags: index of event in the map or bitmask flags - * @buf: buf to fill - * @buf_size: size of the buf - * Return: 0 on success or negative error code - * - * int bpf_perf_prog_read_value(ctx, buf, buf_size) - * read perf prog attached perf event counter and enabled/running time - * @ctx: pointer to ctx - * @buf: buf to fill - * @buf_size: size of the buf - * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set - * - * int bpf_msg_redirect_map(map, key, flags) - * Redirect msg to a sock in map using key as a lookup key for the - * sock in map. - * @map: pointer to sockmap - * @key: key to lookup sock in map - * @flags: reserved for future use - * Return: SK_PASS - * - * int bpf_bind(ctx, addr, addr_len) - * Bind socket to address. Only binding to IP is supported, no port can be - * set in addr. - * @ctx: pointer to context of type bpf_sock_addr - * @addr: pointer to struct sockaddr to bind socket to - * @addr_len: length of sockaddr structure - * Return: 0 on success or negative error code + * Description + * Copy a NUL terminated string from an unsafe address + * *unsafe_ptr* to *dst*. The *size* should include the + * terminating NUL byte. In case the string length is smaller than + * *size*, the target is not padded with further NUL bytes. If the + * string length is larger than *size*, just *size*-1 bytes are + * copied and the last byte is set to NUL. + * + * On success, the length of the copied string is returned. This + * makes this helper useful in tracing programs for reading + * strings, and more importantly to get its length at runtime. See + * the following snippet: + * + * :: + * + * SEC("kprobe/sys_open") + * void bpf_sys_open(struct pt_regs *ctx) + * { + * char buf[PATHLEN]; // PATHLEN is defined to 256 + * int res = bpf_probe_read_str(buf, sizeof(buf), + * ctx->di); + * + * // Consume buf, for example push it to + * // userspace via bpf_perf_event_output(); we + * // can use res (the string length) as event + * // size, after checking its boundaries. + * } + * + * In comparison, using **bpf_probe_read()** helper here instead + * to read the string would require to estimate the length at + * compile time, and would often result in copying more memory + * than necessary. + * + * Another useful use case is when parsing individual process + * arguments or individual environment variables navigating + * *current*\ **->mm->arg_start** and *current*\ + * **->mm->env_start**: using this helper and the return value, + * one can quickly iterate at the right offset of the memory area. + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * u64 bpf_get_socket_cookie(struct sk_buff *skb) + * Description + * If the **struct sk_buff** pointed by *skb* has a known socket, + * retrieve the cookie (generated by the kernel) of this socket. + * If no cookie has been set yet, generate a new cookie. Once + * generated, the socket cookie remains stable for the life of the + * socket. This helper can be useful for monitoring per socket + * networking traffic statistics as it provides a unique socket + * identifier per namespace. + * Return + * A 8-byte long non-decreasing number on success, or 0 if the + * socket field is missing inside *skb*. + * + * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Return + * The owner UID of the socket associated to *skb*. If the socket + * is **NULL**, or if it is not a full socket (i.e. if it is a + * time-wait or a request socket instead), **overflowuid** value + * is returned (note that **overflowuid** might also be the actual + * UID value for the socket). + * + * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * Description + * Set the full hash for *skb* (set the field *skb*\ **->hash**) + * to value *hash*. + * Return + * 0 + * + * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **setsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **setsockopt(2)** for more information. + * The option value of length *optlen* is pointed by *optval*. + * + * This helper actually implements a subset of **setsockopt()**. + * It supports the following *level*\ s: + * + * * **SOL_SOCKET**, which supports the following *optname*\ s: + * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * * **IPPROTO_TCP**, which supports the following *optname*\ s: + * **TCP_CONGESTION**, **TCP_BPF_IW**, + * **TCP_BPF_SNDCWND_CLAMP**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * Description + * Grow or shrink the room for data in the packet associated to + * *skb* by *len_diff*, and according to the selected *mode*. + * + * There is a single supported mode at this time: + * + * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer + * (room space is added or removed below the layer 3 header). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the endpoint referenced by *map* at + * index *key*. Depending on its type, this *map* can contain + * references to net devices (for forwarding packets through other + * ports), or to CPUs (for redirecting XDP frames to another CPU; + * but this is only implemented for native XDP (with driver + * support) as of this writing). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * When used to redirect packets to net devices, this helper + * provides a high performance increase over **bpf_redirect**\ (). + * This is due to various implementation details of the underlying + * mechanisms, one of which is the fact that **bpf_redirect_map**\ + * () tries to send packet as a "bulk" to the device. + * Return + * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * + * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a *map* referencing sockets. The + * *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust the address pointed by *xdp_md*\ **->data_meta** by + * *delta* (which can be positive or negative). Note that this + * operation modifies the address stored in *xdp_md*\ **->data**, + * so the latter must be loaded only after the helper has been + * called. + * + * The use of *xdp_md*\ **->data_meta** is optional and programs + * are not required to use it. The rationale is that when the + * packet is processed with XDP (e.g. as DoS filter), it is + * possible to push further meta data along with it before passing + * to the stack, and to give the guarantee that an ingress eBPF + * program attached as a TC classifier on the same device can pick + * this up for further post-processing. Since TC works with socket + * buffers, it remains possible to set from XDP the **mark** or + * **priority** pointers, or other pointers for the socket buffer. + * Having this scratch space generic and programmable allows for + * more flexibility as the user is free to store whatever meta + * data they need. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * Read the value of a perf event counter, and store it into *buf* + * of size *buf_size*. This helper relies on a *map* of type + * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event + * counter is selected when *map* is updated with perf event file + * descriptors. The *map* is an array whose size is the number of + * available CPUs, and each cell contains a value relative to one + * CPU. The value to retrieve is indicated by *flags*, that + * contains the index of the CPU to look up, masked with + * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * This helper behaves in a way close to + * **bpf_perf_event_read**\ () helper, save that instead of + * just returning the value observed, it fills the *buf* + * structure. This allows for additional data to be retrieved: in + * particular, the enabled and running times (in *buf*\ + * **->enabled** and *buf*\ **->running**, respectively) are + * copied. In general, **bpf_perf_event_read_value**\ () is + * recommended over **bpf_perf_event_read**\ (), which has some + * ABI issues and provides fewer functionalities. + * + * These values are interesting, because hardware PMU (Performance + * Monitoring Unit) counters are limited resources. When there are + * more PMU based perf events opened than available counters, + * kernel will multiplex these events so each event gets certain + * percentage (but not all) of the PMU time. In case that + * multiplexing happens, the number of samples or counter value + * will not reflect the case compared to when no multiplexing + * occurs. This makes comparison between different runs difficult. + * Typically, the counter value should be normalized before + * comparing to other experiments. The usual normalization is done + * as follows. + * + * :: + * + * normalized_counter = counter * t_enabled / t_running + * + * Where t_enabled is the time enabled for event and t_running is + * the time running for event since last normalization. The + * enabled and running times are accumulated since the perf event + * open. To achieve scaling factor between two invocations of an + * eBPF program, users can can use CPU id as the key (which is + * typical for perf array usage model) to remember the previous + * value and do the calculation inside the eBPF program. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * For en eBPF program attached to a perf event, retrieve the + * value of the event counter associated to *ctx* and store it in + * the structure pointed by *buf* and of size *buf_size*. Enabled + * and running times are also stored in the structure (see + * description of helper **bpf_perf_event_read_value**\ () for + * more details). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **getsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **getsockopt(2)** for more information. + * The retrieved value is stored in the structure pointed by + * *opval* and of length *optlen*. + * + * This helper actually implements a subset of **getsockopt()**. + * It supports the following *level*\ s: + * + * * **IPPROTO_TCP**, which supports *optname* + * **TCP_CONGESTION**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_override_return(struct pt_reg *regs, u64 rc) + * Description + * Used for error injection, this helper uses kprobes to override + * the return value of the probed function, and to set it to *rc*. + * The first argument is the context *regs* on which the kprobe + * works. + * + * This helper works by setting setting the PC (program counter) + * to an override function which is run in place of the original + * probed function. This means the probed function is not run at + * all. The replacement function just returns with the required + * value. + * + * This helper has security implications, and thus is subject to + * restrictions. It is only available if the kernel was compiled + * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration + * option, and in this case it only works on functions tagged with + * **ALLOW_ERROR_INJECTION** in the kernel code. + * + * Also, the helper is only available for the architectures having + * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, + * x86 architecture is the only one to support this feature. + * Return + * 0 + * + * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval) + * Description + * Attempt to set the value of the **bpf_sock_ops_cb_flags** field + * for the full TCP socket associated to *bpf_sock_ops* to + * *argval*. + * + * The primary use of this field is to determine if there should + * be calls to eBPF programs of type + * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP + * code. A program of the same type can change its value, per + * connection and as necessary, when the connection is + * established. This field is directly accessible for reading, but + * this helper must be used for updates in order to return an + * error if an eBPF program tries to set a callback that is not + * supported in the current kernel. + * + * The supported callback values that *argval* can combine are: + * + * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) + * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) + * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * + * Here are some examples of where one could call such eBPF + * program: + * + * * When RTO fires. + * * When a packet is retransmitted. + * * When the connection terminates. + * * When a packet is sent. + * * When a packet is received. + * Return + * Code **-EINVAL** if the socket is not a full TCP socket; + * otherwise, a positive number containing the bits that could not + * be set is returned (which comes down to 0 if all bits were set + * as required). + * + * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, apply the verdict of the eBPF program to + * the next *bytes* (number of bytes) of message *msg*. + * + * For example, this helper can be used in the following cases: + * + * * A single **sendmsg**\ () or **sendfile**\ () system call + * contains multiple logical messages that the eBPF program is + * supposed to read and for which it should apply a verdict. + * * An eBPF program only cares to read the first *bytes* of a + * *msg*. If the message has a large payload, then setting up + * and calling the eBPF program repeatedly for all bytes, even + * though the verdict is already known, would create unnecessary + * overhead. + * + * When called from within an eBPF program, the helper sets a + * counter internal to the BPF infrastructure, that is used to + * apply the last verdict to the next *bytes*. If *bytes* is + * smaller than the current data being processed from a + * **sendmsg**\ () or **sendfile**\ () system call, the first + * *bytes* will be sent and the eBPF program will be re-run with + * the pointer for start of data pointing to byte number *bytes* + * **+ 1**. If *bytes* is larger than the current data being + * processed, then the eBPF verdict will be applied to multiple + * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are + * consumed. + * + * Note that if a socket closes with the internal counter holding + * a non-zero value, this is not a problem because data is not + * being buffered for *bytes* and is sent as it is received. + * Return + * 0 + * + * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, prevent the execution of the verdict eBPF + * program for message *msg* until *bytes* (byte number) have been + * accumulated. + * + * This can be used when one needs a specific number of bytes + * before a verdict can be assigned, even if the data spans + * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme + * case would be a user calling **sendmsg**\ () repeatedly with + * 1-byte long message segments. Obviously, this is bad for + * performance, but it is still valid. If the eBPF program needs + * *bytes* bytes to validate a header, this helper can be used to + * prevent the eBPF program to be called again until *bytes* have + * been accumulated. + * Return + * 0 + * + * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * Description + * For socket policies, pull in non-linear data from user space + * for *msg* and set pointers *msg*\ **->data** and *msg*\ + * **->data_end** to *start* and *end* bytes offsets into *msg*, + * respectively. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it can only parse data that the (**data**, **data_end**) + * pointers have already consumed. For **sendmsg**\ () hooks this + * is likely the first scatterlist element. But for calls relying + * on the **sendpage** handler (e.g. **sendfile**\ ()) this will + * be the range (**0**, **0**) because the data is shared with + * user space and by default the objective is to avoid allowing + * user space to modify data while (or after) eBPF verdict is + * being decided. This helper can be used to pull in data and to + * set the start and end pointer to given values. Data will be + * copied if necessary (i.e. if data was not linear and if start + * and end pointers do not point to the same chunk). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len) + * Description + * Bind the socket associated to *ctx* to the address pointed by + * *addr*, of length *addr_len*. This allows for making outgoing + * connection from the desired IP address, which can be useful for + * example when all processes inside a cgroup should use one + * single IP address on a host that has multiple IP configured. + * + * This helper works for IPv4 and IPv6, TCP and UDP sockets. The + * domain (*addr*\ **->sa_family**) must be **AF_INET** (or + * **AF_INET6**). Looking for a free port to bind to can be + * expensive, therefore binding to port is not permitted by the + * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) + * must be set to zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is + * only possible to shrink the packet as of this writing, + * therefore *delta* must be a negative integer. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * Description + * Retrieve the XFRM state (IP transform framework, see also + * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. + * + * The retrieved value is stored in the **struct bpf_xfrm_state** + * pointed by *xfrm_state* and of length *size*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_XFRM** configuration option. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -821,7 +1833,9 @@ union bpf_attr { FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ FN(msg_pull_data), \ - FN(bind), + FN(bind), \ + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -927,6 +1941,19 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + /* Generic BPF return codes which all BPF program types may support. * The values are binary compatible with their TC_ACT_* counter-part to * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT @@ -1017,6 +2044,7 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 gpl_compatible:1; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h new file mode 100644 index 000000000000..bcb56ee47014 --- /dev/null +++ b/tools/include/uapi/linux/btf.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2018 Facebook */ +#ifndef _UAPI__LINUX_BTF_H__ +#define _UAPI__LINUX_BTF_H__ + +#include <linux/types.h> + +#define BTF_MAGIC 0xeB9F +#define BTF_VERSION 1 + +struct btf_header { + __u16 magic; + __u8 version; + __u8 flags; + + __u32 parent_label; + __u32 parent_name; + + /* All offsets are in bytes relative to the end of this header */ + __u32 label_off; /* offset of label section */ + __u32 object_off; /* offset of data object section*/ + __u32 func_off; /* offset of function section */ + __u32 type_off; /* offset of type section */ + __u32 str_off; /* offset of string section */ + __u32 str_len; /* length of string section */ +}; + +/* Max # of type identifier */ +#define BTF_MAX_TYPE 0x7fffffff +/* Max offset into the string section */ +#define BTF_MAX_NAME_OFFSET 0x7fffffff +/* Max # of struct/union/enum members or func args */ +#define BTF_MAX_VLEN 0xffff + +/* The type id is referring to a parent BTF */ +#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1) +#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE) + +/* String is in the ELF string section */ +#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1) +#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET) + +struct btf_type { + __u32 name_off; + /* "info" bits arrangement + * bits 0-15: vlen (e.g. # of struct's members) + * bits 16-23: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused + * bits 31: root + */ + __u32 info; + /* "size" is used by INT, ENUM, STRUCT and UNION. + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is a type_id referring to another type. + */ + union { + __u32 size; + __u32 type; + }; +}; + +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) +#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80)) +#define BTF_INFO_VLEN(info) ((info) & 0xffff) + +#define BTF_KIND_UNKN 0 /* Unknown */ +#define BTF_KIND_INT 1 /* Integer */ +#define BTF_KIND_PTR 2 /* Pointer */ +#define BTF_KIND_ARRAY 3 /* Array */ +#define BTF_KIND_STRUCT 4 /* Struct */ +#define BTF_KIND_UNION 5 /* Union */ +#define BTF_KIND_ENUM 6 /* Enumeration */ +#define BTF_KIND_FWD 7 /* Forward */ +#define BTF_KIND_TYPEDEF 8 /* Typedef */ +#define BTF_KIND_VOLATILE 9 /* Volatile */ +#define BTF_KIND_CONST 10 /* Const */ +#define BTF_KIND_RESTRICT 11 /* Restrict */ +#define BTF_KIND_MAX 11 +#define NR_BTF_KINDS 12 + +/* For some specific BTF_KIND, "struct btf_type" is immediately + * followed by extra data. + */ + +/* BTF_KIND_INT is followed by a u32 and the following + * is the 32 bits arrangement: + */ +#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24) +#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) +#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) + +/* Attributes stored in the BTF_INT_ENCODING */ +#define BTF_INT_SIGNED 0x1 +#define BTF_INT_CHAR 0x2 +#define BTF_INT_BOOL 0x4 +#define BTF_INT_VARARGS 0x8 + +/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". + * The exact number of btf_enum is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum { + __u32 name_off; + __s32 val; +}; + +/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ +struct btf_array { + __u32 type; + __u32 index_type; + __u32 nelems; +}; + +/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed + * by multiple "struct btf_member". The exact number + * of btf_member is stored in the vlen (of the info in + * "struct btf_type"). + */ +struct btf_member { + __u32 name_off; + __u32 type; + __u32 offset; /* offset in bits */ +}; + +#endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 64c679d67109..6070e655042d 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index acbb3f8b3bec..76b36cc16e7f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -73,43 +73,76 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - __u32 name_len = name ? strlen(name) : 0; + __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + attr.map_type = create_attr->map_type; + attr.key_size = create_attr->key_size; + attr.value_size = create_attr->value_size; + attr.max_entries = create_attr->max_entries; + attr.map_flags = create_attr->map_flags; + memcpy(attr.map_name, create_attr->name, + min(name_len, BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = create_attr->numa_node; + attr.btf_fd = create_attr->btf_fd; + attr.btf_key_id = create_attr->btf_key_id; + attr.btf_value_id = create_attr->btf_value_id; + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; if (node >= 0) { - attr.map_flags |= BPF_F_NUMA_NODE; - attr.numa_node = node; + map_attr.numa_node = node; + map_attr.map_flags |= BPF_F_NUMA_NODE; } - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + return bpf_create_map_xattr(&map_attr); } int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, NULL, key_size, value_size, - max_entries, map_flags, -1); + struct bpf_create_map_attr map_attr = {}; + + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); } int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, name, key_size, value_size, - max_entries, map_flags, -1); + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); } int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, @@ -573,3 +606,28 @@ cleanup: close(sock); return ret; } + +int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log) +{ + union bpf_attr attr = {}; + int fd; + + attr.btf = ptr_to_u64(btf); + attr.btf_size = btf_size; + +retry: + if (do_log && log_buf && log_buf_size) { + attr.btf_log_level = 1; + attr.btf_log_size = log_buf_size; + attr.btf_log_buf = ptr_to_u64(log_buf); + } + + fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + if (fd == -1 && !do_log && log_buf && log_buf_size) { + do_log = true; + goto retry; + } + + return fd; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 39f6a0d64a3b..553b11ad52b3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,8 +24,23 @@ #define __BPF_BPF_H #include <linux/bpf.h> +#include <stdbool.h> #include <stddef.h> +struct bpf_create_map_attr { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_id; + __u32 btf_value_id; +}; + +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node); @@ -87,4 +102,6 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); int bpf_raw_tracepoint_open(const char *name, int prog_fd); +int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log); #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c new file mode 100644 index 000000000000..2bac710e3194 --- /dev/null +++ b/tools/lib/bpf/btf.c @@ -0,0 +1,374 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <linux/err.h> +#include <linux/btf.h> +#include "btf.h" +#include "bpf.h" + +#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); } +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) + +#define BTF_MAX_NR_TYPES 65535 + +static struct btf_type btf_void; + +struct btf { + union { + struct btf_header *hdr; + void *data; + }; + struct btf_type **types; + const char *strings; + void *nohdr_data; + uint32_t nr_types; + uint32_t types_size; + uint32_t data_size; + int fd; +}; + +static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset) +{ + if (!BTF_STR_TBL_ELF_ID(offset) && + BTF_STR_OFFSET(offset) < btf->hdr->str_len) + return &btf->strings[BTF_STR_OFFSET(offset)]; + else + return NULL; +} + +static int btf_add_type(struct btf *btf, struct btf_type *t) +{ + if (btf->types_size - btf->nr_types < 2) { + struct btf_type **new_types; + u32 expand_by, new_size; + + if (btf->types_size == BTF_MAX_NR_TYPES) + return -E2BIG; + + expand_by = max(btf->types_size >> 2, 16); + new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); + + new_types = realloc(btf->types, sizeof(*new_types) * new_size); + if (!new_types) + return -ENOMEM; + + if (btf->nr_types == 0) + new_types[0] = &btf_void; + + btf->types = new_types; + btf->types_size = new_size; + } + + btf->types[++(btf->nr_types)] = t; + + return 0; +} + +static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) +{ + const struct btf_header *hdr = btf->hdr; + u32 meta_left; + + if (btf->data_size < sizeof(struct btf_header)) { + elog("BTF header not found\n"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + elog("Invalid BTF magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + elog("Unsupported BTF version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + elog("Unsupported BTF flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + meta_left = btf->data_size - sizeof(*hdr); + if (!meta_left) { + elog("BTF has no data\n"); + return -EINVAL; + } + + if (meta_left < hdr->type_off) { + elog("Invalid BTF type section offset:%u\n", hdr->type_off); + return -EINVAL; + } + + if (meta_left < hdr->str_off) { + elog("Invalid BTF string section offset:%u\n", hdr->str_off); + return -EINVAL; + } + + if (hdr->type_off >= hdr->str_off) { + elog("BTF type section offset >= string section offset. No type?\n"); + return -EINVAL; + } + + if (hdr->type_off & 0x02) { + elog("BTF type section is not aligned to 4 bytes\n"); + return -EINVAL; + } + + btf->nohdr_data = btf->hdr + 1; + + return 0; +} + +static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) +{ + const struct btf_header *hdr = btf->hdr; + const char *start = btf->nohdr_data + hdr->str_off; + const char *end = start + btf->hdr->str_len; + + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + start[0] || end[-1]) { + elog("Invalid BTF string section\n"); + return -EINVAL; + } + + btf->strings = start; + + return 0; +} + +static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) +{ + struct btf_header *hdr = btf->hdr; + void *nohdr_data = btf->nohdr_data; + void *next_type = nohdr_data + hdr->type_off; + void *end_type = nohdr_data + hdr->str_off; + + while (next_type < end_type) { + struct btf_type *t = next_type; + uint16_t vlen = BTF_INFO_VLEN(t->info); + int err; + + next_type += sizeof(*t); + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + next_type += sizeof(int); + break; + case BTF_KIND_ARRAY: + next_type += sizeof(struct btf_array); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + next_type += vlen * sizeof(struct btf_member); + break; + case BTF_KIND_ENUM: + next_type += vlen * sizeof(struct btf_enum); + break; + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + break; + default: + elog("Unsupported BTF_KIND:%u\n", + BTF_INFO_KIND(t->info)); + return -EINVAL; + } + + err = btf_add_type(btf, t); + if (err) + return err; + } + + return 0; +} + +static const struct btf_type *btf_type_by_id(const struct btf *btf, + uint32_t type_id) +{ + if (type_id > btf->nr_types) + return NULL; + + return btf->types[type_id]; +} + +static bool btf_type_is_void(const struct btf_type *t) +{ + return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD; +} + +static bool btf_type_is_void_or_null(const struct btf_type *t) +{ + return !t || btf_type_is_void(t); +} + +static int64_t btf_type_size(const struct btf_type *t) +{ + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + return t->size; + case BTF_KIND_PTR: + return sizeof(void *); + default: + return -EINVAL; + } +} + +#define MAX_RESOLVE_DEPTH 32 + +int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id) +{ + const struct btf_array *array; + const struct btf_type *t; + uint32_t nelems = 1; + int64_t size = -1; + int i; + + t = btf_type_by_id(btf, type_id); + for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); + i++) { + size = btf_type_size(t); + if (size >= 0) + break; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + type_id = t->type; + break; + case BTF_KIND_ARRAY: + array = (const struct btf_array *)(t + 1); + if (nelems && array->nelems > UINT32_MAX / nelems) + return -E2BIG; + nelems *= array->nelems; + type_id = array->type; + break; + default: + return -EINVAL; + } + + t = btf_type_by_id(btf, type_id); + } + + if (size < 0) + return -EINVAL; + + if (nelems && size > UINT32_MAX / nelems) + return -E2BIG; + + return nelems * size; +} + +int32_t btf__find_by_name(const struct btf *btf, const char *type_name) +{ + uint32_t i; + + if (!strcmp(type_name, "void")) + return 0; + + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t = btf->types[i]; + const char *name = btf_name_by_offset(btf, t->name_off); + + if (name && !strcmp(type_name, name)) + return i; + } + + return -ENOENT; +} + +void btf__free(struct btf *btf) +{ + if (!btf) + return; + + if (btf->fd != -1) + close(btf->fd); + + free(btf->data); + free(btf->types); + free(btf); +} + +struct btf *btf__new(uint8_t *data, uint32_t size, + btf_print_fn_t err_log) +{ + uint32_t log_buf_size = 0; + char *log_buf = NULL; + struct btf *btf; + int err; + + btf = calloc(1, sizeof(struct btf)); + if (!btf) + return ERR_PTR(-ENOMEM); + + btf->fd = -1; + + if (err_log) { + log_buf = malloc(BPF_LOG_BUF_SIZE); + if (!log_buf) { + err = -ENOMEM; + goto done; + } + *log_buf = 0; + log_buf_size = BPF_LOG_BUF_SIZE; + } + + btf->data = malloc(size); + if (!btf->data) { + err = -ENOMEM; + goto done; + } + + memcpy(btf->data, data, size); + btf->data_size = size; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + + if (btf->fd == -1) { + err = -errno; + elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (log_buf && *log_buf) + elog("%s\n", log_buf); + goto done; + } + + err = btf_parse_hdr(btf, err_log); + if (err) + goto done; + + err = btf_parse_str_sec(btf, err_log); + if (err) + goto done; + + err = btf_parse_type_sec(btf, err_log); + +done: + free(log_buf); + + if (err) { + btf__free(btf); + return ERR_PTR(err); + } + + return btf; +} + +int btf__fd(const struct btf *btf) +{ + return btf->fd; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h new file mode 100644 index 000000000000..74bb344035bb --- /dev/null +++ b/tools/lib/bpf/btf.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#ifndef __BPF_BTF_H +#define __BPF_BTF_H + +#include <stdint.h> + +#define BTF_ELF_SEC ".BTF" + +struct btf; + +typedef int (*btf_print_fn_t)(const char *, ...) + __attribute__((format(printf, 1, 2))); + +void btf__free(struct btf *btf); +struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log); +int32_t btf__find_by_name(const struct btf *btf, const char *type_name); +int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id); +int btf__fd(const struct btf *btf); + +#endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5922443063f0..7bcdca13083a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -45,6 +45,7 @@ #include "libbpf.h" #include "bpf.h" +#include "btf.h" #ifndef EM_BPF #define EM_BPF 247 @@ -212,6 +213,8 @@ struct bpf_map { char *name; size_t offset; struct bpf_map_def def; + uint32_t btf_key_id; + uint32_t btf_value_id; void *priv; bpf_map_clear_priv_t clear_priv; }; @@ -256,6 +259,8 @@ struct bpf_object { */ struct list_head list; + struct btf *btf; + void *priv; bpf_object_clear_priv_t clear_priv; @@ -819,7 +824,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj) data->d_size); else if (strcmp(name, "maps") == 0) obj->efile.maps_shndx = idx; - else if (sh.sh_type == SHT_SYMTAB) { + else if (strcmp(name, BTF_ELF_SEC) == 0) { + obj->btf = btf__new(data->d_buf, data->d_size, + __pr_debug); + if (IS_ERR(obj->btf)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_ELF_SEC, PTR_ERR(obj->btf)); + obj->btf = NULL; + } + } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", obj->path); @@ -996,33 +1009,126 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return 0; } +static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) +{ + struct bpf_map_def *def = &map->def; + const size_t max_name = 256; + int64_t key_size, value_size; + int32_t key_id, value_id; + char name[max_name]; + + /* Find key type by name from BTF */ + if (snprintf(name, max_name, "%s_key", map->name) == max_name) { + pr_warning("map:%s length of BTF key_type:%s_key is too long\n", + map->name, map->name); + return -EINVAL; + } + + key_id = btf__find_by_name(btf, name); + if (key_id < 0) { + pr_debug("map:%s key_type:%s cannot be found in BTF\n", + map->name, name); + return key_id; + } + + key_size = btf__resolve_size(btf, key_id); + if (key_size < 0) { + pr_warning("map:%s key_type:%s cannot get the BTF type_size\n", + map->name, name); + return key_size; + } + + if (def->key_size != key_size) { + pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n", + map->name, name, key_size, def->key_size); + return -EINVAL; + } + + /* Find value type from BTF */ + if (snprintf(name, max_name, "%s_value", map->name) == max_name) { + pr_warning("map:%s length of BTF value_type:%s_value is too long\n", + map->name, map->name); + return -EINVAL; + } + + value_id = btf__find_by_name(btf, name); + if (value_id < 0) { + pr_debug("map:%s value_type:%s cannot be found in BTF\n", + map->name, name); + return value_id; + } + + value_size = btf__resolve_size(btf, value_id); + if (value_size < 0) { + pr_warning("map:%s value_type:%s cannot get the BTF type_size\n", + map->name, name); + return value_size; + } + + if (def->value_size != value_size) { + pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n", + map->name, name, value_size, def->value_size); + return -EINVAL; + } + + map->btf_key_id = key_id; + map->btf_value_id = value_id; + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { + struct bpf_create_map_attr create_attr = {}; unsigned int i; + int err; for (i = 0; i < obj->nr_maps; i++) { - struct bpf_map_def *def = &obj->maps[i].def; - int *pfd = &obj->maps[i].fd; - - *pfd = bpf_create_map_name(def->type, - obj->maps[i].name, - def->key_size, - def->value_size, - def->max_entries, - def->map_flags); + struct bpf_map *map = &obj->maps[i]; + struct bpf_map_def *def = &map->def; + int *pfd = &map->fd; + + create_attr.name = map->name; + create_attr.map_type = def->type; + create_attr.map_flags = def->map_flags; + create_attr.key_size = def->key_size; + create_attr.value_size = def->value_size; + create_attr.max_entries = def->max_entries; + create_attr.btf_fd = 0; + create_attr.btf_key_id = 0; + create_attr.btf_value_id = 0; + + if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { + create_attr.btf_fd = btf__fd(obj->btf); + create_attr.btf_key_id = map->btf_key_id; + create_attr.btf_value_id = map->btf_value_id; + } + + *pfd = bpf_create_map_xattr(&create_attr); + if (*pfd < 0 && create_attr.btf_key_id) { + pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", + map->name, strerror(errno), errno); + create_attr.btf_fd = 0; + create_attr.btf_key_id = 0; + create_attr.btf_value_id = 0; + map->btf_key_id = 0; + map->btf_value_id = 0; + *pfd = bpf_create_map_xattr(&create_attr); + } + if (*pfd < 0) { size_t j; - int err = *pfd; + err = *pfd; pr_warning("failed to create map (name: '%s'): %s\n", - obj->maps[i].name, + map->name, strerror(errno)); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; } - pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd); + pr_debug("create map %s: fd=%d\n", map->name, *pfd); } return 0; @@ -1641,6 +1747,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object__unload(obj); + btf__free(obj->btf); for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); @@ -1692,6 +1799,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj) return obj ? obj->kern_version : 0; } +int bpf_object__btf_fd(const struct bpf_object *obj) +{ + return obj->btf ? btf__fd(obj->btf) : -1; +} + int bpf_object__set_priv(struct bpf_object *obj, void *priv, bpf_object_clear_priv_t clear_priv) { @@ -1845,11 +1957,12 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); +BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); -static void bpf_program__set_expected_attach_type(struct bpf_program *prog, - enum bpf_attach_type type) +void bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type) { prog->expected_attach_type = type; } @@ -1859,6 +1972,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog, #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0) +#define BPF_S_PROG_SEC(string, ptype) \ + BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype) + #define BPF_SA_PROG_SEC(string, ptype) \ BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype) @@ -1874,6 +1990,7 @@ static const struct { BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), @@ -1889,10 +2006,13 @@ static const struct { BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND), BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT), BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT), + BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND), + BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND), }; #undef BPF_PROG_SEC #undef BPF_PROG_SEC_FULL +#undef BPF_S_PROG_SEC #undef BPF_SA_PROG_SEC static int bpf_program__identify_section(struct bpf_program *prog) @@ -1929,6 +2049,16 @@ const char *bpf_map__name(struct bpf_map *map) return map ? map->name : NULL; } +uint32_t bpf_map__btf_key_id(const struct bpf_map *map) +{ + return map ? map->btf_key_id : 0; +} + +uint32_t bpf_map__btf_value_id(const struct bpf_map *map) +{ + return map ? map->btf_value_id : 0; +} + int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index a3a62a583f27..197f9ce2248c 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -78,6 +78,7 @@ int bpf_object__load(struct bpf_object *obj); int bpf_object__unload(struct bpf_object *obj); const char *bpf_object__name(struct bpf_object *obj); unsigned int bpf_object__kversion(struct bpf_object *obj); +int bpf_object__btf_fd(const struct bpf_object *obj); struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ @@ -185,15 +186,19 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n); */ int bpf_program__set_socket_filter(struct bpf_program *prog); int bpf_program__set_tracepoint(struct bpf_program *prog); +int bpf_program__set_raw_tracepoint(struct bpf_program *prog); int bpf_program__set_kprobe(struct bpf_program *prog); int bpf_program__set_sched_cls(struct bpf_program *prog); int bpf_program__set_sched_act(struct bpf_program *prog); int bpf_program__set_xdp(struct bpf_program *prog); int bpf_program__set_perf_event(struct bpf_program *prog); void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); +void bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type); bool bpf_program__is_socket_filter(struct bpf_program *prog); bool bpf_program__is_tracepoint(struct bpf_program *prog); +bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); bool bpf_program__is_kprobe(struct bpf_program *prog); bool bpf_program__is_sched_cls(struct bpf_program *prog); bool bpf_program__is_sched_act(struct bpf_program *prog); @@ -239,6 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); int bpf_map__fd(struct bpf_map *map); const struct bpf_map_def *bpf_map__def(struct bpf_map *map); const char *bpf_map__name(struct bpf_map *map); +uint32_t bpf_map__btf_key_id(const struct bpf_map *map); +uint32_t bpf_map__btf_value_id(const struct bpf_map *map); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); int bpf_map__set_priv(struct bpf_map *map, void *priv, diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 5e1ab2f0eb79..3e3b3ced3f7c 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -15,3 +15,4 @@ test_libbpf_open test_sock test_sock_addr urandom_read +test_btf diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0a315ddabbf4..b64a7a39cbc8 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -24,14 +24,15 @@ urandom_read: urandom_read.c # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ - test_sock test_sock_addr + test_sock test_btf test_sockmap TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ - sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o + sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ + test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -39,10 +40,11 @@ TEST_PROGS := test_kmod.sh \ test_xdp_redirect.sh \ test_xdp_meta.sh \ test_offload.py \ - test_sock_addr.sh + test_sock_addr.sh \ + test_tunnel.sh # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open +TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr include ../lib.mk @@ -55,6 +57,7 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c +$(OUTPUT)/test_sockmap: cgroup_helpers.c .PHONY: force @@ -66,6 +69,8 @@ $(BPFOBJ): force CLANG ?= clang LLC ?= llc +LLVM_OBJCOPY ?= llvm-objcopy +BTF_PAHOLE ?= pahole PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) @@ -83,9 +88,26 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline +BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help |& grep dwarfris) +BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help |& grep BTF) +BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version |& grep LLVM) + +ifneq ($(BTF_LLC_PROBE),) +ifneq ($(BTF_PAHOLE_PROBE),) +ifneq ($(BTF_OBJCOPY_PROBE),) + CLANG_FLAGS += -g + LLC_FLAGS += -mattr=dwarfris + DWARF2BTF = y +endif +endif +endif + $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ - $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d8223d99f96d..69d7b918e66a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -96,6 +96,11 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; +static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = + (void *) BPF_FUNC_xdp_adjust_tail; +static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, + int size, int flags) = + (void *) BPF_FUNC_skb_get_xfrm_state; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -129,6 +134,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) = + (void *) BPF_FUNC_csum_diff; static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = (void *) BPF_FUNC_skb_under_cgroup; static int (*bpf_skb_change_head)(void *, int len, int flags) = diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c new file mode 100644 index 000000000000..4cd5e860c903 --- /dev/null +++ b/tools/testing/selftests/bpf/test_adjust_tail.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +SEC("xdp_adjust_tail") +int _xdp_adjust_tail(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + int offset = 0; + + if (data_end - data == 54) + offset = 256; + else + offset = 20; + if (bpf_xdp_adjust_tail(xdp, 0 - offset)) + return XDP_DROP; + return XDP_TX; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c new file mode 100644 index 000000000000..7b39b1f712a1 --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf.c @@ -0,0 +1,1669 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/err.h> +#include <bpf/bpf.h> +#include <sys/resource.h> +#include <libelf.h> +#include <gelf.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> + +#include "bpf_rlimit.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define __printf(a, b) __attribute__((format(printf, a, b))) + +__printf(1, 2) +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +#define BTF_INFO_ENC(kind, root, vlen) \ + ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) + +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) + +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) + +#define BTF_ARRAY_ENC(type, index_type, nr_elems) \ + (type), (index_type), (nr_elems) +#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \ + BTF_ARRAY_ENC(type, index_type, nr_elems) + +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) +#define BTF_ENUM_ENC(name, val) (name), (val) + +#define BTF_TYPEDEF_ENC(name, type) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) + +#define BTF_PTR_ENC(name, type) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) + +#define BTF_END_RAW 0xdeadbeef +#define NAME_TBD 0xdeadb33f + +#define MAX_NR_RAW_TYPES 1024 +#define BTF_LOG_BUF_SIZE 65535 + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +static struct args { + unsigned int raw_test_num; + unsigned int file_test_num; + unsigned int get_info_test_num; + bool raw_test; + bool file_test; + bool get_info_test; + bool pprint_test; + bool always_log; +} args; + +static char btf_log_buf[BTF_LOG_BUF_SIZE]; + +static struct btf_header hdr_tmpl = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, +}; + +struct btf_raw_test { + const char *descr; + const char *str_sec; + const char *map_name; + __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 str_sec_size; + enum bpf_map_type map_type; + __u32 key_size; + __u32 value_size; + __u32 key_id; + __u32 value_id; + __u32 max_entries; + bool btf_load_err; + bool map_create_err; + int type_off_delta; + int str_off_delta; + int str_len_delta; +}; + +static struct btf_raw_test raw_tests[] = { +/* enum E { + * E0, + * E1, + * }; + * + * struct A { + * int m; + * unsigned long long n; + * char o; + * [3 bytes hole] + * int p[8]; + * int q[4][8]; + * enum E r; + * }; + */ +{ + .descr = "struct test #1", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* unsigned long long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + /* char */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */ + /* int[8] */ + BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ + /* struct A { */ /* [5] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/ + BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ + BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */ + BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */ + /* } */ + /* int[4][8] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1", + .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test1_map", + .key_size = sizeof(int), + .value_size = 180, + .key_id = 1, + .value_id = 5, + .max_entries = 4, +}, + +/* typedef struct b Struct_B; + * + * struct A { + * int m; + * struct b n[4]; + * const Struct_B o[4]; + * }; + * + * struct B { + * int m; + * int n; + * }; + */ +{ + .descr = "struct test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct b [4] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), + + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */ + BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/ + /* } */ + + /* struct B { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ + /* } */ + + /* const int */ /* [5] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), + /* typedef struct b Struct_B */ /* [6] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4), + /* const Struct_B */ /* [7] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6), + /* const Struct_B [4] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(7, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B", + .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_test2_map", + .key_size = sizeof(int), + .value_size = 68, + .key_id = 1, + .value_id = 3, + .max_entries = 4, +}, + +/* Test member exceeds the size of struct. + * + * struct A { + * int m; + * int n; + * }; + */ +{ + .descr = "size check test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check1_map", + .key_size = sizeof(int), + .value_size = 1, + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +/* Test member exeeds the size of struct + * + * struct A { + * int m; + * int n[2]; + * }; + */ +{ + .descr = "size check test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* int[2] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 1, 2), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check2_map", + .key_size = sizeof(int), + .value_size = 1, + .key_id = 1, + .value_id = 3, + .max_entries = 4, + .btf_load_err = true, + +}, + +/* Test member exeeds the size of struct + * + * struct A { + * int m; + * void *n; + * }; + */ +{ + .descr = "size check test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* void* */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0n", + .str_sec_size = sizeof("\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check3_map", + .key_size = sizeof(int), + .value_size = 1, + .key_id = 1, + .value_id = 3, + .max_entries = 4, + .btf_load_err = true, +}, + +/* Test member exceeds the size of struct + * + * enum E { + * E0, + * E1, + * }; + * + * struct A { + * int m; + * enum E n; + * }; + */ +{ + .descr = "size check test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* enum E { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + /* } */ + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0E\0E0\0E1\0A\0m\0n", + .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check4_map", + .key_size = sizeof(int), + .value_size = 1, + .key_id = 1, + .value_id = 3, + .max_entries = 4, + .btf_load_err = true, +}, + +/* typedef const void * const_void_ptr; + * struct A { + * const_void_ptr m; + * }; + */ +{ + .descr = "void test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void* */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), + /* typedef const void * const_void_ptr */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + /* struct A { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const_void_ptr m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 0), + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0const_void_ptr\0A\0m", + .str_sec_size = sizeof("\0const_void_ptr\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test1_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_id = 1, + .value_id = 4, + .max_entries = 4, +}, + +/* struct A { + * const void m; + * }; + */ +{ + .descr = "void test #2", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* struct A { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8), + /* const void m; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test2_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_id = 1, + .value_id = 3, + .max_entries = 4, + .btf_load_err = true, +}, + +/* typedef const void * const_void_ptr; + * const_void_ptr[4] + */ +{ + .descr = "void test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void* */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), + /* typedef const void * const_void_ptr */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + /* const_void_ptr[4] */ /* [4] */ + BTF_TYPE_ARRAY_ENC(3, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0const_void_ptr", + .str_sec_size = sizeof("\0const_void_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test3_map", + .key_size = sizeof(int), + .value_size = sizeof(void *) * 4, + .key_id = 1, + .value_id = 4, + .max_entries = 4, +}, + +/* const void[4] */ +{ + .descr = "void test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + /* const void[4] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "void_test4_map", + .key_size = sizeof(int), + .value_size = sizeof(void *) * 4, + .key_id = 1, + .value_id = 3, + .max_entries = 4, + .btf_load_err = true, +}, + +/* Array_A <------------------+ + * elem_type == Array_B | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array A --+ + */ +{ + .descr = "loop test #1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* Array_B */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test1_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +/* typedef is _before_ the BTF type of Array_A and Array_B + * + * typedef Array_B int_array; + * + * Array_A <------------------+ + * elem_type == int_array | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #2", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* typedef Array_B int_array */ + BTF_TYPEDEF_ENC(1, 4), /* [2] */ + /* Array_A */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */ + /* Array_B */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */ + + BTF_END_RAW, + }, + .str_sec = "\0int_array\0", + .str_sec_size = sizeof("\0int_array"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test2_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +/* Array_A <------------------+ + * elem_type == Array_B | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* Array_B */ /* [3] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test3_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +/* typedef is _between_ the BTF type of Array_A and Array_B + * + * typedef Array_B int_array; + * + * Array_A <------------------+ + * elem_type == int_array | + * | | + * | | + * Array_B <-------- + | + * elem_type == Array_A --+ + */ +{ + .descr = "loop test #4", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* Array_A */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 8), + /* typedef Array_B int_array */ /* [3] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* Array_B */ /* [4] */ + BTF_TYPE_ARRAY_ENC(2, 1, 8), + BTF_END_RAW, + }, + .str_sec = "\0int_array\0", + .str_sec_size = sizeof("\0int_array"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test4_map", + .key_size = sizeof(int), + .value_size = sizeof(sizeof(int) * 8), + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +/* typedef struct B Struct_B + * + * struct A { + * int x; + * Struct_B y; + * }; + * + * struct B { + * int x; + * struct A y; + * }; + */ +{ + .descr = "loop test #5", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct A */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */ + /* typedef struct B Struct_B */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + /* struct B */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */ + BTF_END_RAW, + }, + .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y", + .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test5_map", + .key_size = sizeof(int), + .value_size = 8, + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +/* struct A { + * int x; + * struct A array_a[4]; + * }; + */ +{ + .descr = "loop test #6", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */ + /* struct A */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */ + BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */ + BTF_END_RAW, + }, + .str_sec = "\0A\0x\0y", + .str_sec_size = sizeof("\0A\0x\0y"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test6_map", + .key_size = sizeof(int), + .value_size = 8, + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +{ + .descr = "loop test #7", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 0), + /* CONST type_id=3 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* PTR type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), + BTF_END_RAW, + }, + .str_sec = "\0A\0m", + .str_sec_size = sizeof("\0A\0m"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test7_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +{ + .descr = "loop test #8", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* struct A { */ /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *m; */ + BTF_MEMBER_ENC(NAME_TBD, 4, 0), + /* struct B { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), + /* const void *n; */ + BTF_MEMBER_ENC(NAME_TBD, 6, 0), + /* CONST type_id=5 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5), + /* PTR type_id=6 */ /* [5] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6), + /* CONST type_id=7 */ /* [6] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7), + /* PTR type_id=4 */ /* [7] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4), + BTF_END_RAW, + }, + .str_sec = "\0A\0m\0B\0n", + .str_sec_size = sizeof("\0A\0m\0B\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "loop_test8_map", + .key_size = sizeof(int), + .value_size = sizeof(void *), + .key_id = 1, + .value_id = 2, + .max_entries = 4, + .btf_load_err = true, +}, + +{ + .descr = "type_off == str_off", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_id = 1, + .value_id = 1, + .max_entries = 4, + .btf_load_err = true, + .type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"), +}, + +{ + .descr = "Unaligned type_off", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_id = 1, + .value_id = 1, + .max_entries = 4, + .btf_load_err = true, + .type_off_delta = 1, +}, + +{ + .descr = "str_off beyonds btf size", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_id = 1, + .value_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_off_delta = sizeof("\0int") + 1, +}, + +{ + .descr = "str_len beyonds btf size", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_id = 1, + .value_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = 1, +}, + +{ + .descr = "String section does not end with null", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_id = 1, + .value_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = -1, +}, + +{ + .descr = "Empty string section", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_id = 1, + .value_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = 0 - (int)sizeof("\0int"), +}, + +}; /* struct btf_raw_test raw_tests[] */ + +static const char *get_next_str(const char *start, const char *end) +{ + return start < end - 1 ? start + 1 : NULL; +} + +static int get_type_sec_size(const __u32 *raw_types) +{ + int i; + + for (i = MAX_NR_RAW_TYPES - 1; + i >= 0 && raw_types[i] != BTF_END_RAW; + i--) + ; + + return i < 0 ? i : i * sizeof(raw_types[0]); +} + +static void *btf_raw_create(const struct btf_header *hdr, + const __u32 *raw_types, + const char *str, + unsigned int str_sec_size, + unsigned int *btf_size) +{ + const char *next_str = str, *end_str = str + str_sec_size; + unsigned int size_needed, offset; + struct btf_header *ret_hdr; + int i, type_sec_size; + uint32_t *ret_types; + void *raw_btf; + + type_sec_size = get_type_sec_size(raw_types); + if (type_sec_size < 0) { + fprintf(stderr, "Cannot get nr_raw_types\n"); + return NULL; + } + + size_needed = sizeof(*hdr) + type_sec_size + str_sec_size; + raw_btf = malloc(size_needed); + if (!raw_btf) { + fprintf(stderr, "Cannot allocate memory for raw_btf\n"); + return NULL; + } + + /* Copy header */ + memcpy(raw_btf, hdr, sizeof(*hdr)); + offset = sizeof(*hdr); + + /* Copy type section */ + ret_types = raw_btf + offset; + for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) { + if (raw_types[i] == NAME_TBD) { + next_str = get_next_str(next_str, end_str); + if (!next_str) { + fprintf(stderr, "Error in getting next_str\n"); + free(raw_btf); + return NULL; + } + ret_types[i] = next_str - str; + next_str += strlen(next_str); + } else { + ret_types[i] = raw_types[i]; + } + } + offset += type_sec_size; + + /* Copy string section */ + memcpy(raw_btf + offset, str, str_sec_size); + + ret_hdr = (struct btf_header *)raw_btf; + ret_hdr->str_off = type_sec_size; + ret_hdr->str_len = str_sec_size; + + *btf_size = size_needed; + + return raw_btf; +} + +static int do_test_raw(unsigned int test_num) +{ + struct btf_raw_test *test = &raw_tests[test_num - 1]; + struct bpf_create_map_attr create_attr = {}; + int map_fd = -1, btf_fd = -1; + unsigned int raw_btf_size; + struct btf_header *hdr; + void *raw_btf; + int err; + + fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + hdr = raw_btf; + + hdr->type_off = (int)hdr->type_off + test->type_off_delta; + hdr->str_off = (int)hdr->str_off + test->str_off_delta; + hdr->str_len = (int)hdr->str_len + test->str_len_delta; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + err = ((btf_fd == -1) != test->btf_load_err); + if (err) + fprintf(stderr, "btf_load_err:%d btf_fd:%d\n", + test->btf_load_err, btf_fd); + + if (err || btf_fd == -1) + goto done; + + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_id = test->key_id; + create_attr.btf_value_id = test->value_id; + + map_fd = bpf_create_map_xattr(&create_attr); + + err = ((map_fd == -1) != test->map_create_err); + if (err) + fprintf(stderr, "map_create_err:%d map_fd:%d\n", + test->map_create_err, map_fd); + +done: + if (!err) + fprintf(stderr, "OK\n"); + + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "%s\n", btf_log_buf); + + if (btf_fd != -1) + close(btf_fd); + if (map_fd != -1) + close(map_fd); + + return err; +} + +static int test_raw(void) +{ + unsigned int i; + int err = 0; + + if (args.raw_test_num) + return do_test_raw(args.raw_test_num); + + for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) + err |= do_test_raw(i); + + return err; +} + +struct btf_get_info_test { + const char *descr; + const char *str_sec; + __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 str_sec_size; + int info_size_delta; +}; + +const struct btf_get_info_test get_info_tests[] = { +{ + .descr = "== raw_btf_size+1", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .info_size_delta = 1, +}, +{ + .descr = "== raw_btf_size-3", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .info_size_delta = -3, +}, +}; + +static int do_test_get_info(unsigned int test_num) +{ + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; + unsigned int raw_btf_size, user_btf_size, expected_nbytes; + uint8_t *raw_btf = NULL, *user_btf = NULL; + int btf_fd = -1, err; + + fprintf(stderr, "BTF GET_INFO_BY_ID test[%u] (%s): ", + test_num, test->descr); + + raw_btf = btf_raw_create(&hdr_tmpl, + test->raw_types, + test->str_sec, + test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + + user_btf = malloc(raw_btf_size); + if (!user_btf) { + fprintf(stderr, "Cannot allocate memory for user_btf\n"); + err = -1; + goto done; + } + + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + if (btf_fd == -1) { + fprintf(stderr, "bpf_load_btf:%s(%d)\n", + strerror(errno), errno); + err = -1; + goto done; + } + + user_btf_size = (int)raw_btf_size + test->info_size_delta; + expected_nbytes = min(raw_btf_size, user_btf_size); + if (raw_btf_size > expected_nbytes) + memset(user_btf + expected_nbytes, 0xff, + raw_btf_size - expected_nbytes); + + err = bpf_obj_get_info_by_fd(btf_fd, user_btf, &user_btf_size); + if (err || user_btf_size != raw_btf_size || + memcmp(raw_btf, user_btf, expected_nbytes)) { + fprintf(stderr, + "err:%d(errno:%d) raw_btf_size:%u user_btf_size:%u expected_nbytes:%u memcmp:%d\n", + err, errno, + raw_btf_size, user_btf_size, expected_nbytes, + memcmp(raw_btf, user_btf, expected_nbytes)); + err = -1; + goto done; + } + + while (expected_nbytes < raw_btf_size) { + fprintf(stderr, "%u...", expected_nbytes); + if (user_btf[expected_nbytes++] != 0xff) { + fprintf(stderr, "!= 0xff\n"); + err = -1; + goto done; + } + } + + fprintf(stderr, "OK\n"); + +done: + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "%s\n", btf_log_buf); + + free(raw_btf); + free(user_btf); + + if (btf_fd != -1) + close(btf_fd); + + return err; +} + +static int test_get_info(void) +{ + unsigned int i; + int err = 0; + + if (args.get_info_test_num) + return do_test_get_info(args.get_info_test_num); + + for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) + err |= do_test_get_info(i); + + return err; +} + +struct btf_file_test { + const char *file; + bool btf_kv_notfound; +}; + +static struct btf_file_test file_tests[] = { +{ + .file = "test_btf_haskv.o", +}, +{ + .file = "test_btf_nokv.o", + .btf_kv_notfound = true, +}, +}; + +static int file_has_btf_elf(const char *fn) +{ + Elf_Scn *scn = NULL; + GElf_Ehdr ehdr; + int elf_fd; + Elf *elf; + int ret; + + if (elf_version(EV_CURRENT) == EV_NONE) { + fprintf(stderr, "Failed to init libelf\n"); + return -1; + } + + elf_fd = open(fn, O_RDONLY); + if (elf_fd == -1) { + fprintf(stderr, "Cannot open file %s: %s(%d)\n", + fn, strerror(errno), errno); + return -1; + } + + elf = elf_begin(elf_fd, ELF_C_READ, NULL); + if (!elf) { + fprintf(stderr, "Failed to read ELF from %s. %s\n", fn, + elf_errmsg(elf_errno())); + ret = -1; + goto done; + } + + if (!gelf_getehdr(elf, &ehdr)) { + fprintf(stderr, "Failed to get EHDR from %s\n", fn); + ret = -1; + goto done; + } + + while ((scn = elf_nextscn(elf, scn))) { + const char *sh_name; + GElf_Shdr sh; + + if (gelf_getshdr(scn, &sh) != &sh) { + fprintf(stderr, + "Failed to get section header from %s\n", fn); + ret = -1; + goto done; + } + + sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + if (!strcmp(sh_name, BTF_ELF_SEC)) { + ret = 1; + goto done; + } + } + + ret = 0; + +done: + close(elf_fd); + elf_end(elf); + return ret; +} + +static int do_test_file(unsigned int test_num) +{ + const struct btf_file_test *test = &file_tests[test_num - 1]; + struct bpf_object *obj = NULL; + struct bpf_program *prog; + struct bpf_map *map; + int err; + + fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, + test->file); + + err = file_has_btf_elf(test->file); + if (err == -1) + return err; + + if (err == 0) { + fprintf(stderr, "SKIP. No ELF %s found\n", BTF_ELF_SEC); + return 0; + } + + obj = bpf_object__open(test->file); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = bpf_object__btf_fd(obj); + if (err == -1) { + fprintf(stderr, "bpf_object__btf_fd: -1\n"); + goto done; + } + + prog = bpf_program__next(NULL, obj); + if (!prog) { + fprintf(stderr, "Cannot find bpf_prog\n"); + err = -1; + goto done; + } + + bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); + err = bpf_object__load(obj); + if (err < 0) { + fprintf(stderr, "bpf_object__load: %d\n", err); + goto done; + } + + map = bpf_object__find_map_by_name(obj, "btf_map"); + if (!map) { + fprintf(stderr, "btf_map not found\n"); + err = -1; + goto done; + } + + err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0) + != test->btf_kv_notfound; + if (err) { + fprintf(stderr, + "btf_kv_notfound:%u btf_key_id:%u btf_value_id:%u\n", + test->btf_kv_notfound, + bpf_map__btf_key_id(map), + bpf_map__btf_value_id(map)); + goto done; + } + + fprintf(stderr, "OK\n"); + +done: + bpf_object__close(obj); + return err; +} + +static int test_file(void) +{ + unsigned int i; + int err = 0; + + if (args.file_test_num) + return do_test_file(args.file_test_num); + + for (i = 1; i <= ARRAY_SIZE(file_tests); i++) + err |= do_test_file(i); + + return err; +} + +const char *pprint_enum_str[] = { + "ENUM_ZERO", + "ENUM_ONE", + "ENUM_TWO", + "ENUM_THREE", +}; + +struct pprint_mapv { + uint32_t ui32; + uint16_t ui16; + /* 2 bytes hole */ + int32_t si32; + uint32_t unused_bits2a:2, + bits28:28, + unused_bits2b:2; + union { + uint64_t ui64; + uint8_t ui8a[8]; + }; + enum { + ENUM_ZERO, + ENUM_ONE, + ENUM_TWO, + ENUM_THREE, + } aenum; +}; + +static struct btf_raw_test pprint_test = { + .descr = "BTF pretty print test #1", + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + /* 2 bits */ /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 2, 2), + /* 28 bits */ /* [7] */ + BTF_TYPE_INT_ENC(0, 0, 0, 28, 4), + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 3, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 28), + BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ + BTF_END_RAW, + }, + .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum", + .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "pprint_test", + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_id = 3, /* unsigned int */ + .value_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}; + +static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i) +{ + v->ui32 = i; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; +} + +static int test_pprint(void) +{ + const struct btf_raw_test *test = &pprint_test; + struct bpf_create_map_attr create_attr = {}; + int map_fd = -1, btf_fd = -1; + struct pprint_mapv mapv = {}; + unsigned int raw_btf_size; + char expected_line[255]; + FILE *pin_file = NULL; + char pin_path[255]; + size_t line_len = 0; + char *line = NULL; + unsigned int key; + uint8_t *raw_btf; + ssize_t nread; + int err; + + fprintf(stderr, "%s......", test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + if (btf_fd == -1) { + err = -1; + fprintf(stderr, "bpf_load_btf: %s(%d)\n", + strerror(errno), errno); + goto done; + } + + create_attr.name = test->map_name; + create_attr.map_type = test->map_type; + create_attr.key_size = test->key_size; + create_attr.value_size = test->value_size; + create_attr.max_entries = test->max_entries; + create_attr.btf_fd = btf_fd; + create_attr.btf_key_id = test->key_id; + create_attr.btf_value_id = test->value_id; + + map_fd = bpf_create_map_xattr(&create_attr); + if (map_fd == -1) { + err = -1; + fprintf(stderr, "bpf_creat_map_btf: %s(%d)\n", + strerror(errno), errno); + goto done; + } + + if (snprintf(pin_path, sizeof(pin_path), "%s/%s", + "/sys/fs/bpf", test->map_name) == sizeof(pin_path)) { + err = -1; + fprintf(stderr, "pin_path is too long\n"); + goto done; + } + + err = bpf_obj_pin(map_fd, pin_path); + if (err) { + fprintf(stderr, "Cannot pin to %s. %s(%d).\n", pin_path, + strerror(errno), errno); + goto done; + } + + for (key = 0; key < test->max_entries; key++) { + set_pprint_mapv(&mapv, key); + bpf_map_update_elem(map_fd, &key, &mapv, 0); + } + + pin_file = fopen(pin_path, "r"); + if (!pin_file) { + err = -1; + fprintf(stderr, "fopen(%s): %s(%d)\n", pin_path, + strerror(errno), errno); + goto done; + } + + /* Skip lines start with '#' */ + while ((nread = getline(&line, &line_len, pin_file)) > 0 && + *line == '#') + ; + + if (nread <= 0) { + err = -1; + fprintf(stderr, "Unexpected EOF\n"); + goto done; + } + + key = 0; + do { + ssize_t nexpected_line; + + set_pprint_mapv(&mapv, key); + nexpected_line = snprintf(expected_line, sizeof(expected_line), + "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n", + key, + mapv.ui32, mapv.si32, + mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b, + mapv.ui64, + mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3], + mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7], + pprint_enum_str[mapv.aenum]); + + if (nexpected_line == sizeof(expected_line)) { + err = -1; + fprintf(stderr, "expected_line is too long\n"); + goto done; + } + + if (strcmp(expected_line, line)) { + err = -1; + fprintf(stderr, "unexpected pprint output\n"); + fprintf(stderr, "expected: %s", expected_line); + fprintf(stderr, " read: %s", line); + goto done; + } + + nread = getline(&line, &line_len, pin_file); + } while (++key < test->max_entries && nread > 0); + + if (key < test->max_entries) { + err = -1; + fprintf(stderr, "Unexpected EOF\n"); + goto done; + } + + if (nread > 0) { + err = -1; + fprintf(stderr, "Unexpected extra pprint output: %s\n", line); + goto done; + } + + err = 0; + +done: + if (!err) + fprintf(stderr, "OK\n"); + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "%s\n", btf_log_buf); + if (btf_fd != -1) + close(btf_fd); + if (map_fd != -1) + close(map_fd); + if (pin_file) + fclose(pin_file); + unlink(pin_path); + free(line); + + return err; +} + +static void usage(const char *cmd) +{ + fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n", + cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), + ARRAY_SIZE(file_tests)); +} + +static int parse_args(int argc, char **argv) +{ + const char *optstr = "lpf:r:g:"; + int opt; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'l': + args.always_log = true; + break; + case 'f': + args.file_test_num = atoi(optarg); + args.file_test = true; + break; + case 'r': + args.raw_test_num = atoi(optarg); + args.raw_test = true; + break; + case 'g': + args.get_info_test_num = atoi(optarg); + args.get_info_test = true; + break; + case 'p': + args.pprint_test = true; + break; + case 'h': + usage(argv[0]); + exit(0); + default: + usage(argv[0]); + return -1; + } + } + + if (args.raw_test_num && + (args.raw_test_num < 1 || + args.raw_test_num > ARRAY_SIZE(raw_tests))) { + fprintf(stderr, "BTF raw test number must be [1 - %zu]\n", + ARRAY_SIZE(raw_tests)); + return -1; + } + + if (args.file_test_num && + (args.file_test_num < 1 || + args.file_test_num > ARRAY_SIZE(file_tests))) { + fprintf(stderr, "BTF file test number must be [1 - %zu]\n", + ARRAY_SIZE(file_tests)); + return -1; + } + + if (args.get_info_test_num && + (args.get_info_test_num < 1 || + args.get_info_test_num > ARRAY_SIZE(get_info_tests))) { + fprintf(stderr, "BTF get info test number must be [1 - %zu]\n", + ARRAY_SIZE(get_info_tests)); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int err = 0; + + err = parse_args(argc, argv); + if (err) + return err; + + if (args.always_log) + libbpf_set_print(__base_pr, __base_pr, __base_pr); + + if (args.raw_test) + err |= test_raw(); + + if (args.get_info_test) + err |= test_get_info(); + + if (args.file_test) + err |= test_file(); + + if (args.pprint_test) + err |= test_pprint(); + + if (args.raw_test || args.get_info_test || args.file_test || + args.pprint_test) + return err; + + err |= test_raw(); + err |= test_get_info(); + err |= test_file(); + + return err; +} diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c new file mode 100644 index 000000000000..8c7ca096ecf2 --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf_haskv.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct ipv_counts { + unsigned int v4; + unsigned int v6; +}; + +typedef int btf_map_key; +typedef struct ipv_counts btf_map_value; +btf_map_key dumm_key; +btf_map_value dummy_value; + +struct bpf_map_def SEC("maps") btf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct ipv_counts), + .max_entries = 4, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + struct ipv_counts *counts; + int key = 0; + + if (!arg->sock) + return 0; + + counts = bpf_map_lookup_elem(&btf_map, &key); + if (!counts) + return 0; + + counts->v6++; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c new file mode 100644 index 000000000000..0ed8e088eebf --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf_nokv.c @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct ipv_counts { + unsigned int v4; + unsigned int v6; +}; + +struct bpf_map_def SEC("maps") btf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct ipv_counts), + .max_entries = 4, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + struct ipv_counts *counts; + int key = 0; + + if (!arg->sock) + return 0; + + counts = bpf_map_lookup_elem(&btf_map, &key); + if (!counts) + return 0; + + counts->v6++; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 4123d0ab90ba..fac581f1c57f 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -166,6 +166,37 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_tail(void) +{ + const char *file = "./test_adjust_tail.o"; + struct bpf_object *obj; + char buf[128]; + __u32 duration, retval, size; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + + CHECK(err || errno || retval != XDP_DROP, + "ipv4", "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != XDP_TX || size != 54, + "ipv6", "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + bpf_object__close(obj); +} + + + #define MAGIC_VAL 0x1234 #define NUM_ITER 100000 #define VIP_NUM 5 @@ -1177,6 +1208,7 @@ int main(void) { test_pkt_access(); test_xdp(); + test_xdp_adjust_tail(); test_l4lb_all(); test_xdp_noinline(); test_tcp_estats(); diff --git a/samples/sockmap/sockmap_user.c b/tools/testing/selftests/bpf/test_sockmap.c index 6f2334912283..29c022d23f4e 100644 --- a/samples/sockmap/sockmap_user.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1,14 +1,5 @@ -/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io #include <stdio.h> #include <stdlib.h> #include <sys/socket.h> @@ -25,6 +16,7 @@ #include <fcntl.h> #include <sys/wait.h> #include <time.h> +#include <sched.h> #include <sys/time.h> #include <sys/resource.h> @@ -41,19 +33,31 @@ #include <getopt.h> -#include "../bpf/bpf_load.h" -#include "../bpf/bpf_util.h" -#include "../bpf/libbpf.h" +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_util.h" +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" int running; -void running_handler(int a); +static void running_handler(int a); /* randomly selected ports for testing on lo */ #define S1_PORT 10000 #define S2_PORT 10001 +#define BPF_FILENAME "test_sockmap_kern.o" +#define CG_PATH "/sockmap" + /* global sockets */ int s1, s2, c1, c2, p1, p2; +int test_cnt; +int passed; +int failed; +int map_fd[8]; +struct bpf_map *maps[8]; +int prog_fd[11]; int txmsg_pass; int txmsg_noisy; @@ -107,7 +111,7 @@ static void usage(char *argv[]) printf("\n"); } -static int sockmap_init_sockets(void) +static int sockmap_init_sockets(int verbose) { int i, err, one = 1; struct sockaddr_in addr; @@ -207,9 +211,11 @@ static int sockmap_init_sockets(void) return errno; } - printf("connected sockets: c1 <-> p1, c2 <-> p2\n"); - printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n", - c1, s1, c2, s2); + if (verbose) { + printf("connected sockets: c1 <-> p1, c2 <-> p2\n"); + printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n", + c1, s1, c2, s2); + } return 0; } @@ -226,6 +232,9 @@ struct sockmap_options { bool sendpage; bool data_test; bool drop_expected; + int iov_count; + int iov_length; + int rate; }; static int msg_loop_sendpage(int fd, int iov_length, int cnt, @@ -324,17 +333,19 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } else { int slct, recv, max_fd = fd; + int fd_flags = O_NONBLOCK; struct timeval timeout; float total_bytes; fd_set w; + fcntl(fd, fd_flags); total_bytes = (float)iov_count * (float)iov_length * (float)cnt; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time: "); while (s->bytes_recvd < total_bytes) { - timeout.tv_sec = 1; - timeout.tv_usec = 0; + timeout.tv_sec = 0; + timeout.tv_usec = 10; /* FD sets */ FD_ZERO(&w); @@ -346,7 +357,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); goto out_errno; } else if (!slct) { - fprintf(stderr, "unexpected timeout\n"); + if (opt->verbose) + fprintf(stderr, "unexpected timeout\n"); errno = -EIO; clock_gettime(CLOCK_MONOTONIC, &s->end); goto out_errno; @@ -409,12 +421,14 @@ static inline float recvdBps(struct msg_stats s) return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec); } -static int sendmsg_test(int iov_count, int iov_buf, int cnt, - struct sockmap_options *opt) +static int sendmsg_test(struct sockmap_options *opt) { float sent_Bps = 0, recvd_Bps = 0; int rx_fd, txpid, rxpid, err = 0; struct msg_stats s = {0}; + int iov_count = opt->iov_count; + int iov_buf = opt->iov_length; + int cnt = opt->rate; int status; errno = 0; @@ -433,7 +447,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt, iov_count = 1; err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false, opt); - if (err) + if (err && opt->verbose) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); @@ -443,10 +457,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt, sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); } - fprintf(stdout, - "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", - s.bytes_sent, sent_Bps, sent_Bps/giga, - s.bytes_recvd, recvd_Bps, recvd_Bps/giga); + if (opt->verbose) + fprintf(stdout, + "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", + s.bytes_sent, sent_Bps, sent_Bps/giga, + s.bytes_recvd, recvd_Bps, recvd_Bps/giga); exit(1); } else if (rxpid == -1) { perror("msg_loop_rx: "); @@ -470,10 +485,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt, sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); } - fprintf(stdout, - "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n", - s.bytes_sent, sent_Bps, sent_Bps/giga, - s.bytes_recvd, recvd_Bps, recvd_Bps/giga); + if (opt->verbose) + fprintf(stdout, + "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n", + s.bytes_sent, sent_Bps, sent_Bps/giga, + s.bytes_recvd, recvd_Bps, recvd_Bps/giga); exit(1); } else if (txpid == -1) { perror("msg_loop_tx: "); @@ -568,102 +584,9 @@ enum { SENDPAGE, }; -int main(int argc, char **argv) +static int run_options(struct sockmap_options *options, int cg_fd, int test) { - int iov_count = 1, length = 1024, rate = 1, tx_prog_fd; - struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; - int opt, longindex, err, cg_fd = 0; - struct sockmap_options options = {0}; - int test = PING_PONG; - char filename[256]; - - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", - long_options, &longindex)) != -1) { - switch (opt) { - case 's': - txmsg_start = atoi(optarg); - break; - case 'e': - txmsg_end = atoi(optarg); - break; - case 'a': - txmsg_apply = atoi(optarg); - break; - case 'k': - txmsg_cork = atoi(optarg); - break; - case 'c': - cg_fd = open(optarg, O_DIRECTORY, O_RDONLY); - if (cg_fd < 0) { - fprintf(stderr, - "ERROR: (%i) open cg path failed: %s\n", - cg_fd, optarg); - return cg_fd; - } - break; - case 'r': - rate = atoi(optarg); - break; - case 'v': - options.verbose = 1; - break; - case 'i': - iov_count = atoi(optarg); - break; - case 'l': - length = atoi(optarg); - break; - case 'd': - options.data_test = true; - break; - case 't': - if (strcmp(optarg, "ping") == 0) { - test = PING_PONG; - } else if (strcmp(optarg, "sendmsg") == 0) { - test = SENDMSG; - } else if (strcmp(optarg, "base") == 0) { - test = BASE; - } else if (strcmp(optarg, "base_sendpage") == 0) { - test = BASE_SENDPAGE; - } else if (strcmp(optarg, "sendpage") == 0) { - test = SENDPAGE; - } else { - usage(argv); - return -1; - } - break; - case 0: - break; - case 'h': - default: - usage(argv); - return -1; - } - } - - if (!cg_fd) { - fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n", - argv[0]); - return -1; - } - - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - running = 1; - - /* catch SIGINT */ - signal(SIGINT, running_handler); - - if (load_bpf_file(filename)) { - fprintf(stderr, "load_bpf_file: (%s) %s\n", - filename, strerror(errno)); - return 1; - } + int i, key, next_key, err, tx_prog_fd = -1, zero = 0; /* If base test skip BPF setup */ if (test == BASE || test == BASE_SENDPAGE) @@ -673,8 +596,9 @@ int main(int argc, char **argv) err = bpf_prog_attach(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER, 0); if (err) { - fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", - err, strerror(errno)); + fprintf(stderr, + "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[0], err, strerror(errno)); return err; } @@ -695,7 +619,7 @@ int main(int argc, char **argv) } run: - err = sockmap_init_sockets(); + err = sockmap_init_sockets(options->verbose); if (err) { fprintf(stderr, "ERROR: test socket failed: %d\n", err); goto out; @@ -729,7 +653,7 @@ run: fprintf(stderr, "ERROR: bpf_prog_attach (txmsg): %d (%s)\n", err, strerror(errno)); - return err; + goto out; } err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY); @@ -737,7 +661,7 @@ run: fprintf(stderr, "ERROR: bpf_map_update_elem (txmsg): %d (%s\n", err, strerror(errno)); - return err; + goto out; } if (txmsg_redir || txmsg_redir_noisy) @@ -750,7 +674,7 @@ run: fprintf(stderr, "ERROR: bpf_map_update_elem (txmsg): %d (%s\n", err, strerror(errno)); - return err; + goto out; } if (txmsg_apply) { @@ -760,7 +684,7 @@ run: fprintf(stderr, "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n", err, strerror(errno)); - return err; + goto out; } } @@ -771,7 +695,7 @@ run: fprintf(stderr, "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n", err, strerror(errno)); - return err; + goto out; } } @@ -782,7 +706,7 @@ run: fprintf(stderr, "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n", err, strerror(errno)); - return err; + goto out; } } @@ -794,7 +718,7 @@ run: fprintf(stderr, "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n", err, strerror(errno)); - return err; + goto out; } } @@ -832,11 +756,13 @@ run: } if (txmsg_skb) { - int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1; + int skb_fd = (test == SENDMSG || test == SENDPAGE) ? + p2 : p1; int ingress = BPF_F_INGRESS; i = 0; - err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY); + err = bpf_map_update_elem(map_fd[7], + &i, &ingress, BPF_ANY); if (err) { fprintf(stderr, "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n", @@ -844,7 +770,8 @@ run: } i = 3; - err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY); + err = bpf_map_update_elem(map_fd[0], + &i, &skb_fd, BPF_ANY); if (err) { fprintf(stderr, "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n", @@ -854,36 +781,679 @@ run: } if (txmsg_drop) - options.drop_expected = true; + options->drop_expected = true; if (test == PING_PONG) - err = forever_ping_pong(rate, &options); + err = forever_ping_pong(options->rate, options); else if (test == SENDMSG) { - options.base = false; - options.sendpage = false; - err = sendmsg_test(iov_count, length, rate, &options); + options->base = false; + options->sendpage = false; + err = sendmsg_test(options); } else if (test == SENDPAGE) { - options.base = false; - options.sendpage = true; - err = sendmsg_test(iov_count, length, rate, &options); + options->base = false; + options->sendpage = true; + err = sendmsg_test(options); } else if (test == BASE) { - options.base = true; - options.sendpage = false; - err = sendmsg_test(iov_count, length, rate, &options); + options->base = true; + options->sendpage = false; + err = sendmsg_test(options); } else if (test == BASE_SENDPAGE) { - options.base = true; - options.sendpage = true; - err = sendmsg_test(iov_count, length, rate, &options); + options->base = true; + options->sendpage = true; + err = sendmsg_test(options); } else fprintf(stderr, "unknown test\n"); out: + /* Detatch and zero all the maps */ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS); + bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER); + bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT); + if (tx_prog_fd >= 0) + bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); + + for (i = 0; i < 8; i++) { + key = next_key = 0; + bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); + while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) { + bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); + key = next_key; + } + } + close(s1); close(s2); close(p1); close(p2); close(c1); close(c2); + return err; +} + +static char *test_to_str(int test) +{ + switch (test) { + case SENDMSG: + return "sendmsg"; + case SENDPAGE: + return "sendpage"; + } + return "unknown"; +} + +#define OPTSTRING 60 +static void test_options(char *options) +{ + memset(options, 0, OPTSTRING); + + if (txmsg_pass) + strncat(options, "pass,", OPTSTRING); + if (txmsg_noisy) + strncat(options, "pass_noisy,", OPTSTRING); + if (txmsg_redir) + strncat(options, "redir,", OPTSTRING); + if (txmsg_redir_noisy) + strncat(options, "redir_noisy,", OPTSTRING); + if (txmsg_drop) + strncat(options, "drop,", OPTSTRING); + if (txmsg_apply) + strncat(options, "apply,", OPTSTRING); + if (txmsg_cork) + strncat(options, "cork,", OPTSTRING); + if (txmsg_start) + strncat(options, "start,", OPTSTRING); + if (txmsg_end) + strncat(options, "end,", OPTSTRING); + if (txmsg_ingress) + strncat(options, "ingress,", OPTSTRING); + if (txmsg_skb) + strncat(options, "skb,", OPTSTRING); +} + +static int __test_exec(int cgrp, int test, struct sockmap_options *opt) +{ + char *options = calloc(60, sizeof(char)); + int err; + + if (test == SENDPAGE) + opt->sendpage = true; + else + opt->sendpage = false; + + if (txmsg_drop) + opt->drop_expected = true; + else + opt->drop_expected = false; + + test_options(options); + + fprintf(stdout, + "[TEST %i]: (%i, %i, %i, %s, %s): ", + test_cnt, opt->rate, opt->iov_count, opt->iov_length, + test_to_str(test), options); + fflush(stdout); + err = run_options(opt, cgrp, test); + fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED"); + test_cnt++; + !err ? passed++ : failed++; + free(options); + return err; +} + +static int test_exec(int cgrp, struct sockmap_options *opt) +{ + int err = __test_exec(cgrp, SENDMSG, opt); + + if (err) + goto out; + + err = __test_exec(cgrp, SENDPAGE, opt); +out: + return err; +} + +static int test_loop(int cgrp) +{ + struct sockmap_options opt; + + int err, i, l, r; + + opt.verbose = 0; + opt.base = false; + opt.sendpage = false; + opt.data_test = false; + opt.drop_expected = false; + opt.iov_count = 0; + opt.iov_length = 0; + opt.rate = 0; + + r = 1; + for (i = 1; i < 100; i += 33) { + for (l = 1; l < 100; l += 33) { + opt.rate = r; + opt.iov_count = i; + opt.iov_length = l; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + } + sched_yield(); +out: + return err; +} + +static int test_txmsg(int cgrp) +{ + int err; + + txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; + txmsg_apply = txmsg_cork = 0; + txmsg_ingress = txmsg_skb = 0; + + txmsg_pass = 1; + err = test_loop(cgrp); + txmsg_pass = 0; + if (err) + goto out; + + txmsg_redir = 1; + err = test_loop(cgrp); + txmsg_redir = 0; + if (err) + goto out; + + txmsg_drop = 1; + err = test_loop(cgrp); + txmsg_drop = 0; + if (err) + goto out; + + txmsg_redir = 1; + txmsg_ingress = 1; + err = test_loop(cgrp); + txmsg_redir = 0; + txmsg_ingress = 0; + if (err) + goto out; +out: + txmsg_pass = 0; + txmsg_redir = 0; + txmsg_drop = 0; + return err; +} + +static int test_send(struct sockmap_options *opt, int cgrp) +{ + int err; + + opt->iov_length = 1; + opt->iov_count = 1; + opt->rate = 1; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 1; + opt->iov_count = 1024; + opt->rate = 1; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 1024; + opt->iov_count = 1; + opt->rate = 1; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 1; + opt->iov_count = 1; + opt->rate = 1024; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->iov_length = 256; + opt->iov_count = 1024; + opt->rate = 10; + err = test_exec(cgrp, opt); + if (err) + goto out; + + opt->rate = 100; + opt->iov_count = 1; + opt->iov_length = 5; + err = test_exec(cgrp, opt); + if (err) + goto out; +out: + sched_yield(); + return err; +} + +static int test_mixed(int cgrp) +{ + struct sockmap_options opt = {0}; + int err; + + txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; + txmsg_apply = txmsg_cork = 0; + txmsg_start = txmsg_end = 0; + /* Test small and large iov_count values with pass/redir/apply/cork */ + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 0; + txmsg_cork = 1; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1; + txmsg_cork = 1; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1024; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 0; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_apply = 1024; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 1; + txmsg_redir = 0; + txmsg_cork = 4096; + txmsg_apply = 4096; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 0; + txmsg_cork = 1; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1024; + txmsg_cork = 0; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 0; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_apply = 1024; + txmsg_cork = 1024; + err = test_send(&opt, cgrp); + if (err) + goto out; + + txmsg_pass = 0; + txmsg_redir = 1; + txmsg_cork = 4096; + txmsg_apply = 4096; + err = test_send(&opt, cgrp); + if (err) + goto out; +out: + return err; +} + +static int test_start_end(int cgrp) +{ + struct sockmap_options opt = {0}; + int err, i; + + /* Test basic start/end with lots of iov_count and iov_lengths */ + txmsg_start = 1; + txmsg_end = 2; + err = test_txmsg(cgrp); + if (err) + goto out; + + /* Test start/end with cork */ + opt.rate = 16; + opt.iov_count = 1; + opt.iov_length = 100; + txmsg_cork = 1600; + + for (i = 99; i <= 1600; i += 500) { + txmsg_start = 0; + txmsg_end = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + + /* Test start/end with cork but pull data in middle */ + for (i = 199; i <= 1600; i += 500) { + txmsg_start = 100; + txmsg_end = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + + /* Test start/end with cork pulling last sg entry */ + txmsg_start = 1500; + txmsg_end = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end pull of single byte in last page */ + txmsg_start = 1111; + txmsg_end = 1112; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end with end < start */ + txmsg_start = 1111; + txmsg_end = 0; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end with end > data */ + txmsg_start = 0; + txmsg_end = 1601; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test start/end with start > data */ + txmsg_start = 1601; + txmsg_end = 1600; + err = test_exec(cgrp, &opt); + +out: + txmsg_start = 0; + txmsg_end = 0; + sched_yield(); + return err; +} + +char *map_names[] = { + "sock_map", + "sock_map_txmsg", + "sock_map_redir", + "sock_apply_bytes", + "sock_cork_bytes", + "sock_pull_bytes", + "sock_redir_flags", + "sock_skb_opts", +}; + +int prog_attach_type[] = { + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_SOCK_OPS, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, + BPF_SK_MSG_VERDICT, +}; + +int prog_type[] = { + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_SK_MSG, +}; + +static int populate_progs(void) +{ + char *bpf_file = BPF_FILENAME; + struct bpf_program *prog; + struct bpf_object *obj; + int i = 0; + long err; + + obj = bpf_object__open(bpf_file); + err = libbpf_get_error(obj); + if (err) { + char err_buf[256]; + + libbpf_strerror(err, err_buf, sizeof(err_buf)); + printf("Unable to load eBPF objects in file '%s' : %s\n", + bpf_file, err_buf); + return -1; + } + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_type(prog, prog_type[i]); + bpf_program__set_expected_attach_type(prog, + prog_attach_type[i]); + i++; + } + + i = bpf_object__load(obj); + i = 0; + bpf_object__for_each_program(prog, obj) { + prog_fd[i] = bpf_program__fd(prog); + i++; + } + + for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) { + maps[i] = bpf_object__find_map_by_name(obj, map_names[i]); + map_fd[i] = bpf_map__fd(maps[i]); + if (map_fd[i] < 0) { + fprintf(stderr, "load_bpf_file: (%i) %s\n", + map_fd[i], strerror(errno)); + return -1; + } + } + + return 0; +} + +static int test_suite(void) +{ + int cg_fd, err; + + err = populate_progs(); + if (err < 0) { + fprintf(stderr, "ERROR: (%i) load bpf failed\n", err); + return err; + } + + if (setup_cgroup_environment()) { + fprintf(stderr, "ERROR: cgroup env failed\n"); + return -EINVAL; + } + + cg_fd = create_and_get_cgroup(CG_PATH); + if (cg_fd < 0) { + fprintf(stderr, + "ERROR: (%i) open cg path failed: %s\n", + cg_fd, optarg); + return cg_fd; + } + + /* Tests basic commands and APIs with range of iov values */ + txmsg_start = txmsg_end = 0; + err = test_txmsg(cg_fd); + if (err) + goto out; + + /* Tests interesting combinations of APIs used together */ + err = test_mixed(cg_fd); + if (err) + goto out; + + /* Tests pull_data API using start/end API */ + err = test_start_end(cg_fd); + if (err) + goto out; + +out: + printf("Summary: %i PASSED %i FAILED\n", passed, failed); + close(cg_fd); + return err; +} + +int main(int argc, char **argv) +{ + struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + int iov_count = 1, length = 1024, rate = 1; + struct sockmap_options options = {0}; + int opt, longindex, err, cg_fd = 0; + char *bpf_file = BPF_FILENAME; + int test = PING_PONG; + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + + if (argc < 2) + return test_suite(); + + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", + long_options, &longindex)) != -1) { + switch (opt) { + case 's': + txmsg_start = atoi(optarg); + break; + case 'e': + txmsg_end = atoi(optarg); + break; + case 'a': + txmsg_apply = atoi(optarg); + break; + case 'k': + txmsg_cork = atoi(optarg); + break; + case 'c': + cg_fd = open(optarg, O_DIRECTORY, O_RDONLY); + if (cg_fd < 0) { + fprintf(stderr, + "ERROR: (%i) open cg path failed: %s\n", + cg_fd, optarg); + return cg_fd; + } + break; + case 'r': + rate = atoi(optarg); + break; + case 'v': + options.verbose = 1; + break; + case 'i': + iov_count = atoi(optarg); + break; + case 'l': + length = atoi(optarg); + break; + case 'd': + options.data_test = true; + break; + case 't': + if (strcmp(optarg, "ping") == 0) { + test = PING_PONG; + } else if (strcmp(optarg, "sendmsg") == 0) { + test = SENDMSG; + } else if (strcmp(optarg, "base") == 0) { + test = BASE; + } else if (strcmp(optarg, "base_sendpage") == 0) { + test = BASE_SENDPAGE; + } else if (strcmp(optarg, "sendpage") == 0) { + test = SENDPAGE; + } else { + usage(argv); + return -1; + } + break; + case 0: + break; + case 'h': + default: + usage(argv); + return -1; + } + } + + if (!cg_fd) { + fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n", + argv[0]); + return -1; + } + + err = populate_progs(); + if (err) { + fprintf(stderr, "populate program: (%s) %s\n", + bpf_file, strerror(errno)); + return 1; + } + running = 1; + + /* catch SIGINT */ + signal(SIGINT, running_handler); + + options.iov_count = iov_count; + options.iov_length = length; + options.rate = rate; + + err = run_options(&options, cg_fd, test); close(cg_fd); return err; } diff --git a/samples/sockmap/sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c index 9ff8bc5dc206..33de97e2b6b6 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/tools/testing/selftests/bpf/test_sockmap_kern.c @@ -1,20 +1,19 @@ -/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#include <uapi/linux/bpf.h> -#include <uapi/linux/if_ether.h> -#include <uapi/linux/if_packet.h> -#include <uapi/linux/ip.h> -#include "../../tools/testing/selftests/bpf/bpf_helpers.h" -#include "../../tools/testing/selftests/bpf/bpf_endian.h" +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" /* Sockmap sample program connects a client and a backend together * using cgroups. @@ -337,5 +336,5 @@ int bpf_prog10(struct sk_msg_md *msg) return SK_DROP; } - +int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh new file mode 100755 index 000000000000..aeb2901f21f4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -0,0 +1,729 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# End-to-end eBPF tunnel test suite +# The script tests BPF network tunnel implementation. +# +# Topology: +# --------- +# root namespace | at_ns0 namespace +# | +# ----------- | ----------- +# | tnl dev | | | tnl dev | (overlay network) +# ----------- | ----------- +# metadata-mode | native-mode +# with bpf | +# | +# ---------- | ---------- +# | veth1 | --------- | veth0 | (underlay network) +# ---------- peer ---------- +# +# +# Device Configuration +# -------------------- +# Root namespace with metadata-mode tunnel + BPF +# Device names and addresses: +# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) +# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay) +# +# Namespace at_ns0 with native tunnel +# Device names and addresses: +# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) +# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay) +# +# +# End-to-end ping packet flow +# --------------------------- +# Most of the tests start by namespace creation, device configuration, +# then ping the underlay and overlay network. When doing 'ping 10.1.1.100' +# from root namespace, the following operations happen: +# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev. +# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata, +# with remote_ip=172.16.1.200 and others. +# 3) Outer tunnel header is prepended and route the packet to veth1's egress +# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0 +# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet +# 6) Forward the packet to the overlay tnl dev + +PING_ARG="-c 3 -w 10 -q" +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +config_device() +{ + ip netns add at_ns0 + ip link add veth0 type veth peer name veth1 + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip link set dev veth1 up mtu 1500 + ip addr add dev veth1 172.16.1.200/24 +} + +add_gre_tunnel() +{ + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE key 2 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ip6gretap_tunnel() +{ + + # assign ipv6 address + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \ + local ::11 remote ::22 + + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip addr add dev $DEV fc80::200/24 + ip link set dev $DEV up +} + +add_erspan_tunnel() +{ + # at_ns0 namespace + if [ "$1" == "v1" ]; then + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 1 erspan 123 + else + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 2 erspan_dir egress erspan_hwid 3 + fi + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ip6erspan_tunnel() +{ + + # assign ipv6 address + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + if [ "$1" == "v1" ]; then + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local ::11 remote ::22 \ + erspan_ver 1 erspan 123 + else + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE seq key 2 \ + local ::11 remote ::22 \ + erspan_ver 2 erspan_dir egress erspan_hwid 7 + fi + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +add_vxlan_tunnel() +{ + # Set static ARP entry here because iptables set-mark works + # on L3 packet, as a result not applying to ARP packets, + # causing errors at get_tunnel_{key/opt}. + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + id 2 dstport 4789 gbp remote 172.16.1.200 + ip netns exec at_ns0 \ + ip link set dev $DEV_NS address 52:54:00:d9:01:00 up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 + ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF + + # root namespace + ip link add dev $DEV type $TYPE external gbp dstport 4789 + ip link set dev $DEV address 52:54:00:d9:02:00 up + ip addr add dev $DEV 10.1.1.200/24 + arp -s 10.1.1.100 52:54:00:d9:01:00 +} + +add_ip6vxlan_tunnel() +{ + #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0 + ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + #ip -4 addr del 172.16.1.200 dev veth1 + ip -6 addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \ + local ::11 remote ::22 + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external dstport 4789 + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +add_geneve_tunnel() +{ + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + id 2 dstport 6081 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE dstport 6081 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ip6geneve_tunnel() +{ + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 22 \ + remote ::22 # geneve has no local option + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +add_ipip_tunnel() +{ + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +add_ipip6tnl_tunnel() +{ + ip netns exec at_ns0 ip addr add ::11/96 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add dev veth1 ::22/96 + ip link set dev veth1 up + + # at_ns0 namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + local ::11 remote ::22 + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip link set dev $DEV_NS up + + # root namespace + ip link add dev $DEV type $TYPE external + ip addr add dev $DEV 10.1.1.200/24 + ip link set dev $DEV up +} + +test_gre() +{ + TYPE=gretap + DEV_NS=gretap00 + DEV=gretap11 + ret=0 + + check $TYPE + config_device + add_gre_tunnel + attach_bpf $DEV gre_set_tunnel gre_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6gre() +{ + TYPE=ip6gre + DEV_NS=ip6gre00 + DEV=ip6gre11 + ret=0 + + check $TYPE + config_device + # reuse the ip6gretap function + add_ip6gretap_tunnel + attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # overlay: ipv4 over ipv6 + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + ping $PING_ARG 10.1.1.100 + check_err $? + # overlay: ipv6 over ipv6 + ip netns exec at_ns0 ping6 $PING_ARG fc80::200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6gretap() +{ + TYPE=ip6gretap + DEV_NS=ip6gretap00 + DEV=ip6gretap11 + ret=0 + + check $TYPE + config_device + add_ip6gretap_tunnel + attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # overlay: ipv4 over ipv6 + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + ping $PING_ARG 10.1.1.100 + check_err $? + # overlay: ipv6 over ipv6 + ip netns exec at_ns0 ping6 $PING_ARG fc80::200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_erspan() +{ + TYPE=erspan + DEV_NS=erspan00 + DEV=erspan11 + ret=0 + + check $TYPE + config_device + add_erspan_tunnel $1 + attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6erspan() +{ + TYPE=ip6erspan + DEV_NS=ip6erspan00 + DEV=ip6erspan11 + ret=0 + + check $TYPE + config_device + add_ip6erspan_tunnel $1 + attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel + ping6 $PING_ARG ::11 + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_vxlan() +{ + TYPE=vxlan + DEV_NS=vxlan00 + DEV=vxlan11 + ret=0 + + check $TYPE + config_device + add_vxlan_tunnel + attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6vxlan() +{ + TYPE=vxlan + DEV_NS=ip6vxlan00 + DEV=ip6vxlan11 + ret=0 + + check $TYPE + config_device + add_ip6vxlan_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # ip4 over ip6 + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: ip6$TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} +} + +test_geneve() +{ + TYPE=geneve + DEV_NS=geneve00 + DEV=geneve11 + ret=0 + + check $TYPE + config_device + add_geneve_tunnel + attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ip6geneve() +{ + TYPE=geneve + DEV_NS=ip6geneve00 + DEV=ip6geneve11 + ret=0 + + check $TYPE + config_device + add_ip6geneve_tunnel + attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: ip6$TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} +} + +test_ipip() +{ + TYPE=ipip + DEV_NS=ipip00 + DEV=ipip11 + ret=0 + + check $TYPE + config_device + add_ipip_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +test_ipip6() +{ + TYPE=ip6tnl + DEV_NS=ipip6tnl00 + DEV=ipip6tnl11 + ret=0 + + check $TYPE + config_device + add_ipip6tnl_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # ip4 over ip6 + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + +setup_xfrm_tunnel() +{ + auth=0x$(printf '1%.0s' {1..40}) + enc=0x$(printf '2%.0s' {1..32}) + spi_in_to_out=0x1 + spi_out_to_in=0x2 + # at_ns0 namespace + # at_ns0 -> root + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # root -> at_ns0 + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip netns exec at_ns0 \ + ip addr add dev veth0 10.1.1.100/32 + ip netns exec at_ns0 \ + ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ + src 10.1.1.100 + + # root namespace + # at_ns0 -> root + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # root -> at_ns0 + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip addr add dev veth1 10.1.1.200/32 + ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 +} + +test_xfrm_tunnel() +{ + config_device + #tcpdump -nei veth1 ip & + output=$(mktemp) + cat /sys/kernel/debug/tracing/trace_pipe | tee $output & + setup_xfrm_tunnel + tc qdisc add dev veth1 clsact + tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \ + sec xfrm_get_state + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + sleep 1 + grep "reqid 1" $output + check_err $? + grep "spi 0x1" $output + check_err $? + grep "remote ip 0xac100164" $output + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: xfrm tunnel"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: xfrm tunnel"${NC} +} + +attach_bpf() +{ + DEV=$1 + SET=$2 + GET=$3 + tc qdisc add dev $DEV clsact + tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET + tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET +} + +cleanup() +{ + ip netns delete at_ns0 2> /dev/null + ip link del veth1 2> /dev/null + ip link del ipip11 2> /dev/null + ip link del ipip6tnl11 2> /dev/null + ip link del gretap11 2> /dev/null + ip link del ip6gre11 2> /dev/null + ip link del ip6gretap11 2> /dev/null + ip link del vxlan11 2> /dev/null + ip link del ip6vxlan11 2> /dev/null + ip link del geneve11 2> /dev/null + ip link del ip6geneve11 2> /dev/null + ip link del erspan11 2> /dev/null + ip link del ip6erspan11 2> /dev/null +} + +cleanup_exit() +{ + echo "CATCH SIGKILL or SIGINT, cleanup and exit" + cleanup + exit 0 +} + +check() +{ + ip link help $1 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP $1: iproute2 not support" + cleanup + return 1 + fi +} + +enable_debug() +{ + echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control + echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control +} + +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + +bpf_tunnel_test() +{ + echo "Testing GRE tunnel..." + test_gre + echo "Testing IP6GRE tunnel..." + test_ip6gre + echo "Testing IP6GRETAP tunnel..." + test_ip6gretap + echo "Testing ERSPAN tunnel..." + test_erspan v2 + echo "Testing IP6ERSPAN tunnel..." + test_ip6erspan v2 + echo "Testing VXLAN tunnel..." + test_vxlan + echo "Testing IP6VXLAN tunnel..." + test_ip6vxlan + echo "Testing GENEVE tunnel..." + test_geneve + echo "Testing IP6GENEVE tunnel..." + test_ip6geneve + echo "Testing IPIP tunnel..." + test_ipip + echo "Testing IPIP6 tunnel..." + test_ipip6 + echo "Testing IPSec tunnel..." + test_xfrm_tunnel +} + +trap cleanup 0 3 6 +trap cleanup_exit 2 9 + +cleanup +bpf_tunnel_test + +exit 0 diff --git a/samples/bpf/tcbpf2_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c index 9a8db7bd6db4..504df69c83df 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/tools/testing/selftests/bpf/test_tunnel_kern.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2016 VMware * Copyright (c) 2016 Facebook * @@ -5,39 +6,41 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#define KBUILD_MODNAME "foo" -#include <uapi/linux/bpf.h> -#include <uapi/linux/if_ether.h> -#include <uapi/linux/if_packet.h> -#include <uapi/linux/ip.h> -#include <uapi/linux/ipv6.h> -#include <uapi/linux/in.h> -#include <uapi/linux/tcp.h> -#include <uapi/linux/filter.h> -#include <uapi/linux/pkt_cls.h> -#include <uapi/linux/erspan.h> -#include <net/ipv6.h> +#include <stddef.h> +#include <string.h> +#include <arpa/inet.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/tcp.h> +#include <linux/socket.h> +#include <linux/pkt_cls.h> +#include <linux/erspan.h> #include "bpf_helpers.h" #include "bpf_endian.h" -#define _htonl __builtin_bswap32 #define ERROR(ret) do {\ char fmt[] = "ERROR line:%d ret:%d\n";\ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ - } while(0) + } while (0) + +int _version SEC("version") = 1; struct geneve_opt { __be16 opt_class; - u8 type; - u8 length:5; - u8 r3:1; - u8 r2:1; - u8 r1:1; - u8 opt_data[8]; /* hard-coded to 8 byte */ + __u8 type; + __u8 length:5; + __u8 r3:1; + __u8 r2:1; + __u8 r1:1; + __u8 opt_data[8]; /* hard-coded to 8 byte */ }; struct vxlan_metadata { - u32 gbp; + __u32 gbp; }; SEC("gre_set_tunnel") @@ -86,7 +89,7 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb) int ret; __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv6[3] = _htonl(0x11); /* ::11 */ + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ key.tunnel_id = 2; key.tunnel_tos = 0; key.tunnel_ttl = 64; @@ -136,7 +139,8 @@ int _erspan_set_tunnel(struct __sk_buff *skb) key.tunnel_tos = 0; key.tunnel_ttl = 64; - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; @@ -147,8 +151,8 @@ int _erspan_set_tunnel(struct __sk_buff *skb) md.version = 1; md.u.index = bpf_htonl(123); #else - u8 direction = 1; - u8 hwid = 7; + __u8 direction = 1; + __u8 hwid = 7; md.version = 2; md.u.md2.dir = direction; @@ -171,7 +175,7 @@ int _erspan_get_tunnel(struct __sk_buff *skb) char fmt[] = "key %d remote ip 0x%x erspan version %d\n"; struct bpf_tunnel_key key; struct erspan_metadata md; - u32 index; + __u32 index; int ret; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); @@ -214,7 +218,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) int ret; __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv6[3] = _htonl(0x11); + key.remote_ipv6[3] = bpf_htonl(0x11); key.tunnel_id = 2; key.tunnel_tos = 0; key.tunnel_ttl = 64; @@ -229,11 +233,11 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) __builtin_memset(&md, 0, sizeof(md)); #ifdef ERSPAN_V1 - md.u.index = htonl(123); + md.u.index = bpf_htonl(123); md.version = 1; #else - u8 direction = 0; - u8 hwid = 17; + __u8 direction = 0; + __u8 hwid = 17; md.version = 2; md.u.md2.dir = direction; @@ -256,10 +260,11 @@ int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n"; struct bpf_tunnel_key key; struct erspan_metadata md; - u32 index; + __u32 index; int ret; - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; @@ -304,7 +309,8 @@ int _vxlan_set_tunnel(struct __sk_buff *skb) key.tunnel_tos = 0; key.tunnel_ttl = 64; - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; @@ -346,6 +352,48 @@ int _vxlan_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ip6vxlan_set_tunnel") +int _ip6vxlan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6vxlan_get_tunnel") +int _ip6vxlan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip6 ::%x label %x\n"; + struct bpf_tunnel_key key; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + + return TC_ACT_OK; +} + SEC("geneve_set_tunnel") int _geneve_set_tunnel(struct __sk_buff *skb) { @@ -360,15 +408,16 @@ int _geneve_set_tunnel(struct __sk_buff *skb) key.tunnel_ttl = 64; __builtin_memset(&gopt, 0x0, sizeof(gopt)); - gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */ + gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ gopt.type = 0x08; gopt.r1 = 0; gopt.r2 = 0; gopt.r3 = 0; gopt.length = 2; /* 4-byte multiple */ - *(int *) &gopt.opt_data = 0xdeadbeef; + *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef); - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; @@ -408,6 +457,71 @@ int _geneve_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("ip6geneve_set_tunnel") +int _ip6geneve_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct geneve_opt gopt; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&gopt, 0x0, sizeof(gopt)); + gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ + gopt.type = 0x08; + gopt.r1 = 0; + gopt.r2 = 0; + gopt.r3 = 0; + gopt.length = 2; /* 4-byte multiple */ + *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef); + + ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6geneve_get_tunnel") +int _ip6geneve_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, gopt.opt_class); + + return TC_ACT_OK; +} + SEC("ipip_set_tunnel") int _ipip_set_tunnel(struct __sk_buff *skb) { @@ -431,9 +545,9 @@ int _ipip_set_tunnel(struct __sk_buff *skb) if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) return TC_ACT_SHOT; - if (tcp->dest == htons(5200)) + if (tcp->dest == bpf_htons(5200)) key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - else if (tcp->dest == htons(5201)) + else if (tcp->dest == bpf_htons(5201)) key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ else return TC_ACT_SHOT; @@ -481,28 +595,12 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) return TC_ACT_SHOT; } - key.remote_ipv6[0] = _htonl(0x2401db00); + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ key.tunnel_ttl = 64; - if (iph->protocol == IPPROTO_ICMP) { - key.remote_ipv6[3] = _htonl(1); - } else { - if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) { - ERROR(iph->protocol); - return TC_ACT_SHOT; - } - - if (tcp->dest == htons(5200)) { - key.remote_ipv6[3] = _htonl(1); - } else if (tcp->dest == htons(5201)) { - key.remote_ipv6[3] = _htonl(2); - } else { - ERROR(tcp->dest); - return TC_ACT_SHOT; - } - } - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; @@ -518,14 +616,15 @@ int _ipip6_get_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key; char fmt[] = "remote ip6 %x::%x\n"; - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), - _htonl(key.remote_ipv6[3])); + bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); return TC_ACT_OK; } @@ -545,28 +644,29 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) return TC_ACT_SHOT; } - key.remote_ipv6[0] = _htonl(0x2401db00); + key.remote_ipv6[0] = bpf_htonl(0x2401db00); key.tunnel_ttl = 64; - if (iph->nexthdr == NEXTHDR_ICMP) { - key.remote_ipv6[3] = _htonl(1); + if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) { + key.remote_ipv6[3] = bpf_htonl(1); } else { - if (iph->nexthdr != NEXTHDR_TCP) { + if (iph->nexthdr != 6 /* NEXTHDR_TCP */) { ERROR(iph->nexthdr); return TC_ACT_SHOT; } - if (tcp->dest == htons(5200)) { - key.remote_ipv6[3] = _htonl(1); - } else if (tcp->dest == htons(5201)) { - key.remote_ipv6[3] = _htonl(2); + if (tcp->dest == bpf_htons(5200)) { + key.remote_ipv6[3] = bpf_htonl(1); + } else if (tcp->dest == bpf_htons(5201)) { + key.remote_ipv6[3] = bpf_htonl(2); } else { ERROR(tcp->dest); return TC_ACT_SHOT; } } - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; @@ -582,14 +682,31 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key; char fmt[] = "remote ip6 %x::%x\n"; - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); if (ret < 0) { ERROR(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), - _htonl(key.remote_ipv6[3])); + bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("xfrm_get_state") +int _xfrm_get_state(struct __sk_buff *skb) +{ + struct bpf_xfrm_state x; + char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; + int ret; + + ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); + if (ret < 0) + return TC_ACT_OK; + + bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), + bpf_ntohl(x.remote_ipv4)); return TC_ACT_OK; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3e7718b1a9ae..165e9ddfa446 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -64,6 +64,7 @@ struct bpf_test { struct bpf_insn insns[MAX_INSNS]; int fixup_map1[MAX_FIXUPS]; int fixup_map2[MAX_FIXUPS]; + int fixup_map3[MAX_FIXUPS]; int fixup_prog[MAX_FIXUPS]; int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; @@ -88,6 +89,11 @@ struct test_val { int foo[MAX_ENTRIES]; }; +struct other_val { + long long foo; + long long bar; +}; + static struct bpf_test tests[] = { { "add+sub+mul", @@ -5594,6 +5600,257 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { + "map lookup helper access to map", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 8 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map update helper access to map", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_update_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map update helper access to map: wrong size", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_update_elem), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .fixup_map3 = { 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=0 size=16", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const imm)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, + offsetof(struct other_val, bar)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 9 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const imm): out-of-bound 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, + sizeof(struct other_val) - 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 9 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=12 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const imm): out-of-bound 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 9 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=-4 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const reg)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + offsetof(struct other_val, bar)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const reg): out-of-bound 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, + sizeof(struct other_val) - 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=12 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via const reg): out-of-bound 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, -4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=-4 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via variable)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct other_val, bar), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 11 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via variable): no max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 10 }, + .result = REJECT, + .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "map helper access to adjusted map (via variable): wrong max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, + offsetof(struct other_val, bar) + 1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map3 = { 3, 11 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=9 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { "map element value is preserved across register spilling", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -11533,6 +11790,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, { int *fixup_map1 = test->fixup_map1; int *fixup_map2 = test->fixup_map2; + int *fixup_map3 = test->fixup_map3; int *fixup_prog = test->fixup_prog; int *fixup_map_in_map = test->fixup_map_in_map; @@ -11556,6 +11814,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } while (*fixup_map2); } + if (*fixup_map3) { + map_fds[1] = create_map(sizeof(struct other_val), 1); + do { + prog[*fixup_map3].imm = map_fds[1]; + fixup_map3++; + } while (*fixup_map3); + } + if (*fixup_prog) { map_fds[2] = create_prog_array(); do { diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c612d6e38c62..f0e6c35a93ae 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -7,3 +7,7 @@ reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack reuseaddr_conflict +tcp_mmap +udpgso +udpgso_bench_rx +udpgso_bench_tx diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index daf5effec3f0..902820d3e848 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,13 +5,18 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh udpgso.sh +TEST_PROGS += udpgso_bench.sh TEST_GEN_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy +TEST_GEN_FILES += tcp_mmap tcp_inq TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict +TEST_GEN_PROGS += udpgso udpgso_bench_tx udpgso_bench_rx include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread +$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 75d922438bc9..d8313d0438b7 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -75,14 +76,31 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +learning() +{ + learning_test "br0" $swp1 $h1 $h2 +} + +flooding() +{ + flood_test $swp2 $h1 $h2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 192.0.2.2 -ping6_test $h1 2001:db8:1::2 -learning_test "br0" $swp1 $h1 $h2 -flood_test $swp2 $h1 $h2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1cddf06f691d..c15c6c85c984 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" NUM_NETIFS=4 source lib.sh @@ -73,14 +74,31 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +learning() +{ + learning_test "br0" $swp1 $h1 $h2 +} + +flooding() +{ + flood_test $swp2 $h1 $h2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 192.0.2.2 -ping6_test $h1 2001:db8:1::2 -learning_test "br0" $swp1 $h1 $h2 -flood_test $swp2 $h1 $h2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 1ac6c62271f3..91041c49655b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -321,6 +321,25 @@ simple_if_fini() vrf_destroy $vrf_name } +tunnel_create() +{ + local name=$1; shift + local type=$1; shift + local local=$1; shift + local remote=$1; shift + + ip link add name $name type $type \ + local $local remote $remote "$@" + ip link set dev $name up +} + +tunnel_destroy() +{ + local name=$1; shift + + ip link del dev $name +} + master_name_get() { local if_name=$1 @@ -335,6 +354,15 @@ link_stats_tx_packets_get() ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' } +tc_rule_stats_get() +{ + local dev=$1; shift + local pref=$1; shift + + tc -j -s filter show dev $dev ingress pref $pref | + jq '.[1].options.actions[].stats.packets' +} + mac_get() { local if_name=$1 @@ -353,19 +381,33 @@ bridge_ageing_time_get() echo $((ageing_time / 100)) } -forwarding_enable() +declare -A SYSCTL_ORIG +sysctl_set() +{ + local key=$1; shift + local value=$1; shift + + SYSCTL_ORIG[$key]=$(sysctl -n $key) + sysctl -qw $key=$value +} + +sysctl_restore() { - ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding) - ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding) + local key=$1; shift - sysctl -q -w net.ipv4.conf.all.forwarding=1 - sysctl -q -w net.ipv6.conf.all.forwarding=1 + sysctl -qw $key=${SYSCTL_ORIG["$key"]} +} + +forwarding_enable() +{ + sysctl_set net.ipv4.conf.all.forwarding 1 + sysctl_set net.ipv6.conf.all.forwarding 1 } forwarding_restore() { - sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd - sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd + sysctl_restore net.ipv6.conf.all.forwarding + sysctl_restore net.ipv4.conf.all.forwarding } tc_offload_check() @@ -381,6 +423,83 @@ tc_offload_check() return 0 } +slow_path_trap_install() +{ + local dev=$1; shift + local direction=$1; shift + + if [ "${tcflags/skip_hw}" != "$tcflags" ]; then + # For slow-path testing, we need to install a trap to get to + # slow path the packets that would otherwise be switched in HW. + tc filter add dev $dev $direction pref 1 \ + flower skip_sw action trap + fi +} + +slow_path_trap_uninstall() +{ + local dev=$1; shift + local direction=$1; shift + + if [ "${tcflags/skip_hw}" != "$tcflags" ]; then + tc filter del dev $dev $direction pref 1 flower skip_sw + fi +} + +__icmp_capture_add_del() +{ + local add_del=$1; shift + local pref=$1; shift + local vsuf=$1; shift + local tundev=$1; shift + local filter=$1; shift + + tc filter $add_del dev "$tundev" ingress \ + proto ip$vsuf pref $pref \ + flower ip_proto icmp$vsuf $filter \ + action pass +} + +icmp_capture_install() +{ + __icmp_capture_add_del add 100 "" "$@" +} + +icmp_capture_uninstall() +{ + __icmp_capture_add_del del 100 "" "$@" +} + +icmp6_capture_install() +{ + __icmp_capture_add_del add 100 v6 "$@" +} + +icmp6_capture_uninstall() +{ + __icmp_capture_add_del del 100 v6 "$@" +} + +matchall_sink_create() +{ + local dev=$1; shift + + tc qdisc add dev $dev clsact + tc filter add dev $dev ingress \ + pref 10000 \ + matchall \ + action drop +} + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + ############################################################################## # Tests diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh new file mode 100755 index 000000000000..c6786d1b2b96 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the device to mirror to is a +# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated, +# and another gretap / ip6gretap netdevice is then capable of decapsulating the +# traffic. Test that the payload is what is expected (ICMP ping request or +# reply, depending on test). + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_mac + test_ip6gretap_mac + test_two_spans +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_mac() +{ + local tundev=$1; shift + local direction=$1; shift + local prot=$1; shift + local what=$1; shift + + local swp3mac=$(mac_get $swp3) + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + tc qdisc add dev $h3 clsact + tc filter add dev $h3 ingress pref 77 prot $prot \ + flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ + action pass + + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + + tc filter del dev $h3 ingress pref 77 + tc qdisc del dev $h3 clsact + mirror_uninstall $swp1 $direction + + log_test "$direction $what: envelope MAC ($tcflags)" +} + +test_two_spans() +{ + RET=0 + + mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 egress gt6 "matchall $tcflags" + quick_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt6 egress + + mirror_uninstall $swp1 ingress + fail_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt6 egress + + mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_uninstall $swp1 egress + quick_test_span_gre_dir gt4 ingress + fail_test_span_gre_dir gt6 egress + + mirror_uninstall $swp1 ingress + log_test "two simultaneously configured mirrors ($tcflags)" +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" +} + +test_ip6gretap() +{ + full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap" + full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" +} + +test_gretap_mac() +{ + test_span_gre_mac gt4 ingress ip "mirror to gretap" + test_span_gre_mac gt4 egress ip "mirror to gretap" +} + +test_ip6gretap_mac() +{ + test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap" + test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh new file mode 100755 index 000000000000..360ca133bead --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -0,0 +1,226 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | +---------------------------------------------------------------------+ | +# | | OL + gt6 (ip6gretap) + gt4 (gretap) | | +# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | | +# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | | +# | | : ttl=100 : ttl=100 | | +# | | : tos=inherit : tos=inherit | | +# | +-------------------------:--|-------------------:--|-----------------+ | +# | : | : | | +# | +-------------------------:--|-------------------:--|-----------------+ | +# | | UL : |,---------------------' | | +# | | + $swp3 : || : | | +# | | | 192.0.2.129/28 : vv : | | +# | | | 2001:db8:2::1/64 : + ul (dummy) : | | +# | +---|---------------------:----------------------:--------------------+ | +# +-----|---------------------:----------------------:----------------------+ +# | : : +# +-----|---------------------:----------------------:----------------------+ +# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) | +# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 | +# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 | +# | ttl=100 ttl=100 | +# | tos=inherit tos=inherit | +# | | +# +-------------------------------------------------------------------------+ +# +# This tests mirroring to gretap and ip6gretap configured in an overlay / +# underlay manner, i.e. with a bound dummy device that marks underlay VRF where +# the encapsulated packed should be routed. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64 + + tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129 + ip link set h3-gt4 vrf v$h3 + matchall_sink_create h3-gt4 + + tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1 + ip link set h3-gt6 vrf v$h3 + matchall_sink_create h3-gt6 +} + +h3_destroy() +{ + tunnel_destroy h3-gt6 + tunnel_destroy h3-gt4 + + simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64 +} + +switch_create() +{ + # Bridge between H1 and H2. + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + + # Underlay. + + simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64 + + ip link add name ul type dummy + ip link set dev ul master v$swp3 + ip link set dev ul up + + # Overlay. + + vrf_create vrf-ol + ip link set dev vrf-ol up + + tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \ + ttl 100 tos inherit dev ul + ip link set dev gt4 master vrf-ol + ip link set dev gt4 up + + tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \ + ttl 100 tos inherit dev ul allow-localremote + ip link set dev gt6 master vrf-ol + ip link set dev gt6 up +} + +switch_destroy() +{ + vrf_destroy vrf-ol + + tunnel_destroy gt6 + tunnel_destroy gt4 + + simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64 + + ip link del dev ul + + tc qdisc del dev $swp1 clsact + + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL" +} + +test_ip6gretap() +{ + full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL" + full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL" +} + +test_all() +{ + RET=0 + + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh new file mode 100755 index 000000000000..50ab3462af0c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test how mirrors to gretap and ip6gretap react to changes to relevant +# configuration. + +ALL_TESTS=" + test_ttl + test_tun_up + test_egress_up + test_remote_ip +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + # This test downs $swp3, which deletes the configured IPv6 address + # unless this sysctl is set. + sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1 + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_ttl() +{ + local tundev=$1; shift + local type=$1; shift + local prot=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + tc qdisc add dev $h3 clsact + tc filter add dev $h3 ingress pref 77 prot $prot \ + flower ip_ttl 50 action pass + + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0 + + ip link set dev $tundev type $type ttl 50 + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + + ip link set dev $tundev type $type ttl 100 + tc filter del dev $h3 ingress pref 77 + tc qdisc del dev $h3 clsact + mirror_uninstall $swp1 ingress + + log_test "$what: TTL change ($tcflags)" +} + +test_span_gre_tun_up() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev down + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + ip link set dev $tundev up + + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: tunnel down/up ($tcflags)" +} + +test_span_gre_egress_up() +{ + local tundev=$1; shift + local remote_ip=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $swp3 down + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + # After setting the device up, wait for neighbor to get resolved so that + # we can expect mirroring to work. + ip link set dev $swp3 up + while true; do + ip neigh sh dev $swp3 $remote_ip nud reachable | + grep -q ^ + if [[ $? -ne 0 ]]; then + sleep 1 + else + break + fi + done + + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: egress down/up ($tcflags)" +} + +test_span_gre_remote_ip() +{ + local tundev=$1; shift + local type=$1; shift + local correct_ip=$1; shift + local wrong_ip=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev type $type remote $wrong_ip + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + ip link set dev $tundev type $type remote $correct_ip + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: remote address change ($tcflags)" +} + +test_ttl() +{ + test_span_gre_ttl gt4 gretap ip "mirror to gretap" + test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap" +} + +test_tun_up() +{ + test_span_gre_tun_up gt4 "mirror to gretap" + test_span_gre_tun_up gt6 "mirror to ip6gretap" +} + +test_egress_up() +{ + test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap" + test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap" +} + +test_remote_ip() +{ + test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap" + test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh new file mode 100755 index 000000000000..2e54407d8954 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The +# interfaces on H1 and H2 have two addresses each. Flower match on one of the +# addresses is configured with mirror action. It is expected that when pinging +# this address, mirroring takes place, whereas when pinging the other one, +# there's no mirroring. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 + + ip address add dev $h1 192.0.2.3/28 + ip address add dev $h2 192.0.2.4/28 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h2 192.0.2.4/28 + ip address del dev $h1 192.0.2.3/28 + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_dir_acl() +{ + test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 +} + +full_test_span_gre_dir_acl() +{ + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local match_dip=$1; shift + local what=$1; shift + + mirror_install $swp1 $direction $tundev \ + "protocol ip flower $tcflags dst_ip $match_dip" + fail_test_span_gre_dir $tundev $direction + test_span_gre_dir_acl "$tundev" "$direction" \ + "$forward_type" "$backward_type" + mirror_uninstall $swp1 $direction + + log_test "$direction $what ($tcflags)" +} + +test_gretap() +{ + full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap" + full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap" +} + +test_ip6gretap() +{ + full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap" + full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap" +} + +test_all() +{ + RET=0 + + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh new file mode 100644 index 000000000000..207ffd167dba --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: GPL-2.0 + +do_test_span_gre_dir_ips() +{ + local expect=$1; shift + local tundev=$1; shift + local direction=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + icmp_capture_install h3-$tundev + mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 $expect + mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 $expect + icmp_capture_uninstall h3-$tundev +} + +quick_test_span_gre_dir_ips() +{ + do_test_span_gre_dir_ips 10 "$@" +} + +fail_test_span_gre_dir_ips() +{ + do_test_span_gre_dir_ips 0 "$@" +} + +test_span_gre_dir_ips() +{ + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + quick_test_span_gre_dir_ips "$tundev" "$direction" "$ip1" "$ip2" + + icmp_capture_install h3-$tundev "type $forward_type" + mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 10 + icmp_capture_uninstall h3-$tundev + + icmp_capture_install h3-$tundev "type $backward_type" + mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 10 + icmp_capture_uninstall h3-$tundev +} + +full_test_span_gre_dir_ips() +{ + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \ + "$backward_type" "$ip1" "$ip2" + mirror_uninstall $swp1 $direction + + log_test "$direction $what ($tcflags)" +} + +quick_test_span_gre_dir() +{ + quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +fail_test_span_gre_dir() +{ + fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +test_span_gre_dir() +{ + test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_dir() +{ + full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh new file mode 100755 index 000000000000..fc0508e40fca --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for +# the tunnel remote address has invalid address at the time that the mirroring +# is set up. Later on, the neighbor is deleted and it is expected to be +# reinitialized using the usual ARP process, and the mirroring offload updated. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_neigh() +{ + local addr=$1; shift + local tundev=$1; shift + local direction=$1; shift + local what=$1; shift + + RET=0 + + ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55 + mirror_install $swp1 $direction $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + ip neigh del dev $swp3 $addr + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 $direction + + log_test "$direction $what: neighbor change ($tcflags)" +} + +test_gretap() +{ + test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap" +} + +test_ip6gretap() +{ + test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap" + test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh new file mode 100755 index 000000000000..8fa681eb90e7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint +# is reachable through a next-hop route (as opposed to directly-attached route). + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.$h3.rp_filter 0 + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.161/28 + ip address add dev $h3 192.0.2.162/28 + ip address add dev gt4 192.0.2.129/32 + ip address add dev h3-gt4 192.0.2.130/32 + + # IPv6 route can't be added after address. Such routes are rejected due + # to the gateway address having been configured on the local system. It + # works the other way around though. + ip address add dev $swp3 2001:db8:4::1/64 + ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2 + ip address add dev $h3 2001:db8:4::2/64 + ip address add dev gt6 2001:db8:2::1 + ip address add dev h3-gt6 2001:db8:2::2 +} + +cleanup() +{ + pre_cleanup + + ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2 + ip address del dev $h3 2001:db8:4::2/64 + ip address del dev $swp3 2001:db8:4::1/64 + + ip address del dev $h3 192.0.2.162/28 + ip address del dev $swp3 192.0.2.161/28 + + mirror_gre_topo_destroy + vrf_cleanup + + sysctl_restore net.ipv4.conf.$h3.rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + +test_gretap() +{ + RET=0 + mirror_install $swp1 ingress gt4 "matchall $tcflags" + + # For IPv4, test that there's no mirroring without the route directing + # the traffic to tunnel remote address. Then add it and test that + # mirroring starts. For IPv6 we can't test this due to the limitation + # that routes for locally-specified IPv6 addresses can't be added. + fail_test_span_gre_dir gt4 ingress + + ip route add 192.0.2.130/32 via 192.0.2.162 + quick_test_span_gre_dir gt4 ingress + ip route del 192.0.2.130/32 via 192.0.2.162 + + mirror_uninstall $swp1 ingress + log_test "mirror to gre with next-hop remote ($tcflags)" +} + +test_ip6gretap() +{ + RET=0 + + mirror_install $swp1 ingress gt6 "matchall $tcflags" + quick_test_span_gre_dir gt6 ingress + mirror_uninstall $swp1 ingress + + log_test "mirror to ip6gre with next-hop remote ($tcflags)" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh new file mode 100644 index 000000000000..b3ceda2b4197 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is the standard topology for testing mirroring to gretap and ip6gretap +# netdevices. The tests that use it tweak it in one way or another--importantly, +# $swp3 and $h3 need to have addresses set up. +# +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) | +# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | +# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | +# | | : ttl=100 : ttl=100 | +# | | : tos=inherit : tos=inherit | +# | | : : | +# +-----|---------------------:----------------------:----------------------+ +# | : : +# +-----|---------------------:----------------------:----------------------+ +# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) | +# | loc=2001:db8:2::2 loc=192.0.2.130 | +# | rem=2001:db8:2::1 rem=192.0.2.129 | +# | ttl=100 ttl=100 | +# | tos=inherit tos=inherit | +# | | +# +-------------------------------------------------------------------------+ + +mirror_gre_topo_h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +mirror_gre_topo_h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +mirror_gre_topo_h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +mirror_gre_topo_h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +mirror_gre_topo_h3_create() +{ + simple_if_init $h3 + + tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129 + ip link set h3-gt4 vrf v$h3 + matchall_sink_create h3-gt4 + + tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1 + ip link set h3-gt6 vrf v$h3 + matchall_sink_create h3-gt6 +} + +mirror_gre_topo_h3_destroy() +{ + tunnel_destroy h3-gt6 + tunnel_destroy h3-gt4 + + simple_if_fini $h3 +} + +mirror_gre_topo_switch_create() +{ + ip link set dev $swp3 up + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \ + ttl 100 tos inherit + + tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \ + ttl 100 tos inherit allow-localremote + + tc qdisc add dev $swp1 clsact +} + +mirror_gre_topo_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + + tunnel_destroy gt6 + tunnel_destroy gt4 + + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br1 + + ip link set dev $swp3 down +} + +mirror_gre_topo_create() +{ + mirror_gre_topo_h1_create + mirror_gre_topo_h2_create + mirror_gre_topo_h3_create + + mirror_gre_topo_switch_create +} + +mirror_gre_topo_destroy() +{ + mirror_gre_topo_switch_destroy + + mirror_gre_topo_h3_destroy + mirror_gre_topo_h2_destroy + mirror_gre_topo_h1_destroy +} diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh new file mode 100644 index 000000000000..e5028a5725e3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-2.0 + +mirror_install() +{ + local from_dev=$1; shift + local direction=$1; shift + local to_dev=$1; shift + local filter=$1; shift + + tc filter add dev $from_dev $direction \ + pref 1000 $filter \ + action mirred egress mirror dev $to_dev +} + +mirror_uninstall() +{ + local from_dev=$1; shift + local direction=$1; shift + + tc filter del dev $swp1 $direction pref 1000 +} + +mirror_test() +{ + local vrf_name=$1; shift + local sip=$1; shift + local dip=$1; shift + local dev=$1; shift + local pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $dev $pref) + ip vrf exec $vrf_name \ + ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null + local t1=$(tc_rule_stats_get $dev $pref) + local delta=$((t1 - t0)) + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index cc6a14abfa87..a75cb51cc5bd 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6" NUM_NETIFS=4 source lib.sh @@ -114,12 +115,21 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 198.51.100.2 -ping6_test $h1 2001:db8:2::2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 3bc351008db6..8b6d0fb6d604 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test" NUM_NETIFS=8 source lib.sh @@ -191,7 +192,7 @@ multipath_eval() diff=$(echo $weights_ratio - $packets_ratio | bc -l) diff=${diff#-} - test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0 + test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0 check_err $? "Too large discrepancy between expected and measured ratios" log_test "$desc" log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" @@ -204,13 +205,11 @@ multipath4_test() local weight_rp13=$3 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 - local hash_policy # Transmit multiple flows from h1 to h2 and make sure they are # distributed between both multipath links (rp12 and rp13) # according to the configured weights. - hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) - sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + sysctl_set net.ipv4.fib_multipath_hash_policy 1 ip route replace 198.51.100.0/24 vrf vrf-r1 \ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13 @@ -232,7 +231,7 @@ multipath4_test() ip route replace 198.51.100.0/24 vrf vrf-r1 \ nexthop via 169.254.2.22 dev $rp12 \ nexthop via 169.254.3.23 dev $rp13 - sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy + sysctl_restore net.ipv4.fib_multipath_hash_policy } multipath6_l4_test() @@ -242,13 +241,11 @@ multipath6_l4_test() local weight_rp13=$3 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 - local hash_policy # Transmit multiple flows from h1 to h2 and make sure they are # distributed between both multipath links (rp12 and rp13) # according to the configured weights. - hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy) - sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip route replace 2001:db8:2::/64 vrf vrf-r1 \ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ @@ -271,7 +268,7 @@ multipath6_l4_test() nexthop via fe80:2::22 dev $rp12 \ nexthop via fe80:3::23 dev $rp13 - sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy + sysctl_restore net.ipv6.fib_multipath_hash_policy } multipath6_test() @@ -364,13 +361,21 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 198.51.100.2 -ping6_test $h1 2001:db8:2::2 -multipath_test +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 3a6385ebd5d0..813d02d1939d 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ + mirred_egress_mirror_test gact_trap_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -111,6 +113,10 @@ gact_trap_test() { RET=0 + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ skip_hw dst_ip 192.0.2.2 action drop tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \ @@ -179,24 +185,29 @@ cleanup() ip link set $swp1 address $swp1origmac } +mirred_egress_redirect_test() +{ + mirred_egress_test "redirect" +} + +mirred_egress_mirror_test() +{ + mirred_egress_test "mirror" +} + trap cleanup EXIT setup_prepare setup_wait -gact_drop_and_ok_test -mirred_egress_test "redirect" -mirred_egress_test "mirror" +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - gact_drop_and_ok_test - mirred_egress_test "redirect" - mirred_egress_test "mirror" - gact_trap_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh index 2fd15226974b..d2c783e94df3 100755 --- a/tools/testing/selftests/net/forwarding/tc_chains.sh +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="unreachable_chain_test gact_goto_chain_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -107,16 +108,14 @@ trap cleanup EXIT setup_prepare setup_wait -unreachable_chain_test -gact_goto_chain_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - unreachable_chain_test - gact_goto_chain_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 032b882adfc0..20d1077e5a3d 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ + match_src_ip_test match_ip_flags_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -149,6 +151,74 @@ match_src_ip_test() log_test "src_ip match ($tcflags)" } +match_ip_flags_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags ip_flags frag action continue + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags ip_flags firstfrag action continue + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags ip_flags nofirstfrag action continue + tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \ + $tcflags ip_flags nofrag action drop + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=0" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on wrong frag filter (nofrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on wrong firstfrag filter (nofrag)" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on nofirstfrag filter (nofrag) " + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Did not match on nofrag filter (nofrag)" + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=0,mf" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on frag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match fistfrag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Matched on wrong nofirstfrag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Match on wrong nofrag filter (1stfrag)" + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=256,mf" -q + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=256" -q + + tc_check_packets "dev $h2 ingress" 101 3 + check_err $? "Did not match on frag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Matched on wrong firstfrag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 103 3 + check_err $? "Did not match on nofirstfrag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Matched on nofrag filter (no1stfrag)" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower + + log_test "ip_flags match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} @@ -177,20 +247,14 @@ trap cleanup EXIT setup_prepare setup_wait -match_dst_mac_test -match_src_mac_test -match_dst_ip_test -match_src_ip_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - match_dst_mac_test - match_src_mac_test - match_dst_ip_test - match_src_ip_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh index 077b98048ef4..b5b917203815 100755 --- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh +++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="shared_block_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -109,14 +110,14 @@ trap cleanup EXIT setup_prepare setup_wait -shared_block_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - shared_block_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 1e428781a625..7651fd4d86fe 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -368,7 +368,7 @@ test_pmtu_vti6_link_add_mtu() { fail=0 - min=1280 + min=68 # vti6 can carry IPv4 packets too max=$((65535 - 40)) # Check invalid values first for v in $((min - 1)) $((max + 1)); do @@ -384,7 +384,7 @@ test_pmtu_vti6_link_add_mtu() { done # Now check valid values - for v in 1280 1300 $((65535 - 40)); do + for v in 68 1280 1300 $((65535 - 40)); do ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 mtu="$(link_get_mtu "${ns_a}" vti6_a)" ${ns_a} ip link del vti6_a diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c new file mode 100644 index 000000000000..d044b29ddabc --- /dev/null +++ b/tools/testing/selftests/net/tcp_inq.c @@ -0,0 +1,189 @@ +/* + * Copyright 2018 Google Inc. + * Author: Soheil Hassas Yeganeh (soheil@google.com) + * + * Simple example on how to use TCP_INQ and TCP_CM_INQ. + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + */ +#define _GNU_SOURCE + +#include <error.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <pthread.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> + +#ifndef TCP_INQ +#define TCP_INQ 36 +#endif + +#ifndef TCP_CM_INQ +#define TCP_CM_INQ TCP_INQ +#endif + +#define BUF_SIZE 8192 +#define CMSG_SIZE 32 + +static int family = AF_INET6; +static socklen_t addr_len = sizeof(struct sockaddr_in6); +static int port = 4974; + +static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (family) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr4->sin_port = htons(port); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_loopback; + addr6->sin6_port = htons(port); + break; + default: + error(1, 0, "illegal family"); + } +} + +void *start_server(void *arg) +{ + int server_fd = (int)(unsigned long)arg; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + char *buf; + int fd; + int r; + + buf = malloc(BUF_SIZE); + + for (;;) { + fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); + if (fd == -1) { + perror("accept"); + break; + } + do { + r = send(fd, buf, BUF_SIZE, 0); + } while (r < 0 && errno == EINTR); + if (r < 0) + perror("send"); + if (r != BUF_SIZE) + fprintf(stderr, "can only send %d bytes\n", r); + /* TCP_INQ can overestimate in-queue by one byte if we send + * the FIN packet. Sleep for 1 second, so that the client + * likely invoked recvmsg(). + */ + sleep(1); + close(fd); + } + + free(buf); + close(server_fd); + pthread_exit(0); +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage listen_addr, addr; + int c, one = 1, inq = -1; + pthread_t server_thread; + char cmsgbuf[CMSG_SIZE]; + struct iovec iov[1]; + struct cmsghdr *cm; + struct msghdr msg; + int server_fd, fd; + char *buf; + + while ((c = getopt(argc, argv, "46p:")) != -1) { + switch (c) { + case '4': + family = PF_INET; + addr_len = sizeof(struct sockaddr_in); + break; + case '6': + family = PF_INET6; + addr_len = sizeof(struct sockaddr_in6); + break; + case 'p': + port = atoi(optarg); + break; + } + } + + server_fd = socket(family, SOCK_STREAM, 0); + if (server_fd < 0) + error(1, errno, "server socket"); + setup_loopback_addr(family, &listen_addr); + if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, + &one, sizeof(one)) != 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + if (bind(server_fd, (const struct sockaddr *)&listen_addr, + addr_len) == -1) + error(1, errno, "bind"); + if (listen(server_fd, 128) == -1) + error(1, errno, "listen"); + if (pthread_create(&server_thread, NULL, start_server, + (void *)(unsigned long)server_fd) != 0) + error(1, errno, "pthread_create"); + + fd = socket(family, SOCK_STREAM, 0); + if (fd < 0) + error(1, errno, "client socket"); + setup_loopback_addr(family, &addr); + if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1) + error(1, errno, "connect"); + if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0) + error(1, errno, "setsockopt(TCP_INQ)"); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + msg.msg_flags = 0; + + buf = malloc(BUF_SIZE); + iov[0].iov_base = buf; + iov[0].iov_len = BUF_SIZE / 2; + + if (recvmsg(fd, &msg, 0) != iov[0].iov_len) + error(1, errno, "recvmsg"); + if (msg.msg_flags & MSG_CTRUNC) + error(1, 0, "control message is truncated"); + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) + if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ) + inq = *((int *) CMSG_DATA(cm)); + + if (inq != BUF_SIZE - iov[0].iov_len) { + fprintf(stderr, "unexpected inq: %d\n", inq); + exit(1); + } + + printf("PASSED\n"); + free(buf); + close(fd); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c new file mode 100644 index 000000000000..77f762780199 --- /dev/null +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -0,0 +1,447 @@ +/* + * Copyright 2018 Google Inc. + * Author: Eric Dumazet (edumazet@google.com) + * + * Reference program demonstrating tcp mmap() usage, + * and SO_RCVLOWAT hints for receiver. + * + * Note : NIC with header split is needed to use mmap() on TCP : + * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload. + * + * How to use on loopback interface : + * + * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header) + * tcp_mmap -s -z & + * tcp_mmap -H ::1 -z + * + * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12) + * (4096 : page size on x86, 12: TCP TS option length) + * tcp_mmap -s -z -M $((4096+12)) & + * tcp_mmap -H ::1 -z -M $((4096+12)) + * + * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface. + * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;) + * + * $ ./tcp_mmap -s & # Without mmap() + * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done + * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit + * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches + * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit + * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches + * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit + * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches + * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit + * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches + * $ kill %1 # kill tcp_mmap server + * + * $ ./tcp_mmap -s -z & # With mmap() + * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done + * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit + * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit + * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit + * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit + * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#define _GNU_SOURCE +#include <pthread.h> +#include <sys/types.h> +#include <fcntl.h> +#include <error.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <poll.h> +#include <linux/tcp.h> +#include <assert.h> + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +#define FILE_SZ (1UL << 35) +static int cfg_family = AF_INET6; +static socklen_t cfg_alen = sizeof(struct sockaddr_in6); +static int cfg_port = 8787; + +static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */ +static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */ +static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ +static int xflg; /* hash received data (simple xor) (-h option) */ +static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ + +static int chunk_size = 512*1024; + +unsigned long htotal; + +static inline void prefetch(const void *x) +{ +#if defined(__x86_64__) + asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x)); +#endif +} + +void hash_zone(void *zone, unsigned int length) +{ + unsigned long temp = htotal; + + while (length >= 8*sizeof(long)) { + prefetch(zone + 384); + temp ^= *(unsigned long *)zone; + temp ^= *(unsigned long *)(zone + sizeof(long)); + temp ^= *(unsigned long *)(zone + 2*sizeof(long)); + temp ^= *(unsigned long *)(zone + 3*sizeof(long)); + temp ^= *(unsigned long *)(zone + 4*sizeof(long)); + temp ^= *(unsigned long *)(zone + 5*sizeof(long)); + temp ^= *(unsigned long *)(zone + 6*sizeof(long)); + temp ^= *(unsigned long *)(zone + 7*sizeof(long)); + zone += 8*sizeof(long); + length -= 8*sizeof(long); + } + while (length >= 1) { + temp ^= *(unsigned char *)zone; + zone += 1; + length--; + } + htotal = temp; +} + +void *child_thread(void *arg) +{ + unsigned long total_mmap = 0, total = 0; + struct tcp_zerocopy_receive zc; + unsigned long delta_usec; + int flags = MAP_SHARED; + struct timeval t0, t1; + char *buffer = NULL; + void *addr = NULL; + double throughput; + struct rusage ru; + int lu, fd; + + fd = (int)(unsigned long)arg; + + gettimeofday(&t0, NULL); + + fcntl(fd, F_SETFL, O_NDELAY); + buffer = malloc(chunk_size); + if (!buffer) { + perror("malloc"); + goto error; + } + if (zflg) { + addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0); + if (addr == (void *)-1) + zflg = 0; + } + while (1) { + struct pollfd pfd = { .fd = fd, .events = POLLIN, }; + int sub; + + poll(&pfd, 1, 10000); + if (zflg) { + socklen_t zc_len = sizeof(zc); + int res; + + zc.address = (__u64)addr; + zc.length = chunk_size; + zc.recv_skip_hint = 0; + res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, + &zc, &zc_len); + if (res == -1) + break; + + if (zc.length) { + assert(zc.length <= chunk_size); + total_mmap += zc.length; + if (xflg) + hash_zone(addr, zc.length); + total += zc.length; + } + if (zc.recv_skip_hint) { + assert(zc.recv_skip_hint <= chunk_size); + lu = read(fd, buffer, zc.recv_skip_hint); + if (lu > 0) { + if (xflg) + hash_zone(buffer, lu); + total += lu; + } + } + continue; + } + sub = 0; + while (sub < chunk_size) { + lu = read(fd, buffer + sub, chunk_size - sub); + if (lu == 0) + goto end; + if (lu < 0) + break; + if (xflg) + hash_zone(buffer + sub, lu); + total += lu; + sub += lu; + } + } +end: + gettimeofday(&t1, NULL); + delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; + + throughput = 0; + if (delta_usec) + throughput = total * 8.0 / (double)delta_usec / 1000.0; + getrusage(RUSAGE_THREAD, &ru); + if (total > 1024*1024) { + unsigned long total_usec; + unsigned long mb = total >> 20; + total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec + + 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec; + printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n" + " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n", + total / (1024.0 * 1024.0), + 100.0*total_mmap/total, + (double)delta_usec / 1000000.0, + throughput, + (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0, + (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0, + (double)total_usec/mb, + ru.ru_nvcsw); + } +error: + free(buffer); + close(fd); + if (zflg) + munmap(addr, chunk_size); + pthread_exit(0); +} + +static void apply_rcvsnd_buf(int fd) +{ + if (rcvbuf && setsockopt(fd, SOL_SOCKET, + SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) { + perror("setsockopt SO_RCVBUF"); + } + + if (sndbuf && setsockopt(fd, SOL_SOCKET, + SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) { + perror("setsockopt SO_SNDBUF"); + } +} + + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static void do_accept(int fdlisten) +{ + if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT, + &chunk_size, sizeof(chunk_size)) == -1) { + perror("setsockopt SO_RCVLOWAT"); + } + + apply_rcvsnd_buf(fdlisten); + + while (1) { + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + pthread_t th; + int fd, res; + + fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen); + if (fd == -1) { + perror("accept"); + continue; + } + res = pthread_create(&th, NULL, child_thread, + (void *)(unsigned long)fd); + if (res) { + errno = res; + perror("pthread_create"); + close(fd); + } + } +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage listenaddr, addr; + unsigned int max_pacing_rate = 0; + unsigned long total = 0; + char *host = NULL; + int fd, c, on = 1; + char *buffer; + int sflg = 0; + int mss = 0; + + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'p': + cfg_port = atoi(optarg); + break; + case 'H': + host = optarg; + break; + case 's': /* server : listen for incoming connections */ + sflg++; + break; + case 'r': + rcvbuf = atoi(optarg); + break; + case 'w': + sndbuf = atoi(optarg); + break; + case 'z': + zflg = 1; + break; + case 'M': + mss = atoi(optarg); + break; + case 'x': + xflg = 1; + break; + case 'k': + keepflag = 1; + break; + case 'P': + max_pacing_rate = atoi(optarg) ; + break; + default: + exit(1); + } + } + if (sflg) { + int fdlisten = socket(cfg_family, SOCK_STREAM, 0); + + if (fdlisten == -1) { + perror("socket"); + exit(1); + } + apply_rcvsnd_buf(fdlisten); + setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + + setup_sockaddr(cfg_family, host, &listenaddr); + + if (mss && + setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG, + &mss, sizeof(mss)) == -1) { + perror("setsockopt TCP_MAXSEG"); + exit(1); + } + if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) { + perror("bind"); + exit(1); + } + if (listen(fdlisten, 128) == -1) { + perror("listen"); + exit(1); + } + do_accept(fdlisten); + } + buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buffer == (char *)-1) { + perror("mmap"); + exit(1); + } + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) { + perror("socket"); + exit(1); + } + apply_rcvsnd_buf(fd); + + setup_sockaddr(cfg_family, host, &addr); + + if (mss && + setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) { + perror("setsockopt TCP_MAXSEG"); + exit(1); + } + if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) { + perror("connect"); + exit(1); + } + if (max_pacing_rate && + setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE, + &max_pacing_rate, sizeof(max_pacing_rate)) == -1) + perror("setsockopt SO_MAX_PACING_RATE"); + + if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, + &on, sizeof(on)) == -1) { + perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); + zflg = 0; + } + while (total < FILE_SZ) { + long wr = FILE_SZ - total; + + if (wr > chunk_size) + wr = chunk_size; + /* Note : we just want to fill the pipe with 0 bytes */ + wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0); + if (wr <= 0) + break; + total += wr; + } + close(fd); + munmap(buffer, chunk_size); + return 0; +} diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c new file mode 100644 index 000000000000..48a0592db938 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.c @@ -0,0 +1,620 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <stddef.h> +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <net/if.h> +#include <linux/in.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#define CONST_MTU_TEST 1500 + +#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) +#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) + +#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) +#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) + +#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) +#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) + +static bool cfg_do_ipv4; +static bool cfg_do_ipv6; +static bool cfg_do_connected; +static bool cfg_do_connectionless; +static bool cfg_do_msgmore; +static bool cfg_do_setsockopt; +static int cfg_specific_test_id = -1; + +static const char cfg_ifname[] = "lo"; +static unsigned short cfg_port = 9000; + +static char buf[ETH_MAX_MTU]; + +struct testcase { + int tlen; /* send() buffer size, may exceed mss */ + bool tfail; /* send() call is expected to fail */ + int gso_len; /* mss after applying gso */ + int r_num_mss; /* recv(): number of calls of full mss */ + int r_len_last; /* recv(): size of last non-mss dgram, if any */ +}; + +const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; +const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; + +struct testcase testcases_v4[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V4, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V4 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V4 * 2, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V4 * 2) + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), + }, + + { + /* send MAX bytes */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = CONST_MAX_SEGS_V4, + .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - + (CONST_MAX_SEGS_V4 * CONST_MSS_V4), + }, + { + /* send MAX + 1: fail */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, + .gso_len = CONST_MSS_V4, + .tfail = true, + }, + { + /* EOL */ + } +}; + +#ifndef IP6_MAX_MTU +#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) +#endif + +struct testcase testcases_v6[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V6, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V6 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V6 * 2, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V6 * 2) + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), + }, + + { + /* send MAX bytes */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = CONST_MAX_SEGS_V6, + .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - + (CONST_MAX_SEGS_V6 * CONST_MSS_V6), + }, + { + /* send MAX + 1: fail */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, + .gso_len = CONST_MSS_V6, + .tfail = true, + }, + { + /* EOL */ + } +}; + +static unsigned int get_device_mtu(int fd, const char *ifname) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCGIFMTU, &ifr)) + error(1, errno, "ioctl get mtu"); + + return ifr.ifr_mtu; +} + +static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_mtu = mtu; + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCSIFMTU, &ifr)) + error(1, errno, "ioctl set mtu"); +} + +static void set_device_mtu(int fd, int mtu) +{ + int val; + + val = get_device_mtu(fd, cfg_ifname); + fprintf(stderr, "device mtu (orig): %u\n", val); + + __set_device_mtu(fd, cfg_ifname, mtu); + val = get_device_mtu(fd, cfg_ifname); + if (val != mtu) + error(1, 0, "unable to set device mtu to %u\n", val); + + fprintf(stderr, "device mtu (test): %u\n", val); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +static unsigned int get_path_mtu(int fd, bool is_ipv4) +{ + socklen_t vallen; + unsigned int mtu; + int ret; + + vallen = sizeof(mtu); + if (is_ipv4) + ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); + else + ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); + + if (ret) + error(1, errno, "getsockopt mtu"); + + + fprintf(stderr, "path mtu (read): %u\n", mtu); + return mtu; +} + +/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ +static void set_route_mtu(int mtu, bool is_ipv4) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct nlmsghdr *nh; + struct rtattr *rta; + struct rtmsg *rt; + char data[NLMSG_ALIGN(sizeof(*nh)) + + NLMSG_ALIGN(sizeof(*rt)) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + + NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; + int fd, ret, alen, off = 0; + + alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) + error(1, errno, "socket netlink"); + + memset(data, 0, sizeof(data)); + + nh = (void *)data; + nh->nlmsg_type = RTM_NEWROUTE; + nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + off += NLMSG_ALIGN(sizeof(*nh)); + + rt = (void *)(data + off); + rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; + rt->rtm_table = RT_TABLE_MAIN; + rt->rtm_dst_len = alen << 3; + rt->rtm_protocol = RTPROT_BOOT; + rt->rtm_scope = RT_SCOPE_UNIVERSE; + rt->rtm_type = RTN_UNICAST; + off += NLMSG_ALIGN(sizeof(*rt)); + + rta = (void *)(data + off); + rta->rta_type = RTA_DST; + rta->rta_len = RTA_LENGTH(alen); + if (is_ipv4) + memcpy(RTA_DATA(rta), &addr4, alen); + else + memcpy(RTA_DATA(rta), &addr6, alen); + off += NLMSG_ALIGN(rta->rta_len); + + rta = (void *)(data + off); + rta->rta_type = RTA_OIF; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); + off += NLMSG_ALIGN(rta->rta_len); + + /* MTU is a subtype in a metrics type */ + rta = (void *)(data + off); + rta->rta_type = RTA_METRICS; + rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); + off += NLMSG_ALIGN(rta->rta_len); + + /* now fill MTU subtype. Note that it fits within above rta_len */ + rta = (void *)(((char *) rta) + RTA_LENGTH(0)); + rta->rta_type = RTAX_MTU; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = mtu; + + nh->nlmsg_len = off; + + ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); + if (ret != off) + error(1, errno, "send netlink: %uB != %uB\n", ret, off); + + if (close(fd)) + error(1, errno, "close netlink"); + + fprintf(stderr, "route mtu (test): %u\n", mtu); +} + +static bool __send_one(int fd, struct msghdr *msg, int flags) +{ + int ret; + + ret = sendmsg(fd, msg, flags); + if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM)) + return false; + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != msg->msg_iov->iov_len) + error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); + if (msg->msg_flags) + error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); + + return true; +} + +static bool send_one(int fd, int len, int gso_len, + struct sockaddr *addr, socklen_t alen) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_name = addr; + msg.msg_namelen = alen; + + if (gso_len && !cfg_do_setsockopt) { + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); + *((uint16_t *) CMSG_DATA(cm)) = gso_len; + } + + /* If MSG_MORE, send 1 byte followed by remainder */ + if (cfg_do_msgmore && len > 1) { + iov.iov_len = 1; + if (!__send_one(fd, &msg, MSG_MORE)) + error(1, 0, "send 1B failed"); + + iov.iov_base++; + iov.iov_len = len - 1; + } + + return __send_one(fd, &msg, 0); +} + +static int recv_one(int fd, int flags) +{ + int ret; + + ret = recv(fd, buf, sizeof(buf), flags); + if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) + return 0; + if (ret == -1) + error(1, errno, "recv"); + + return ret; +} + +static void run_one(struct testcase *test, int fdt, int fdr, + struct sockaddr *addr, socklen_t alen) +{ + int i, ret, val, mss; + bool sent; + + fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + addr->sa_family == AF_INET ? 4 : 6, + test->tlen, test->gso_len, + test->tfail ? "(fail)" : ""); + + val = test->gso_len; + if (cfg_do_setsockopt) { + if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) + error(1, errno, "setsockopt udp segment"); + } + + sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); + if (sent && test->tfail) + error(1, 0, "send succeeded while expecting failure"); + if (!sent && !test->tfail) + error(1, 0, "send failed while expecting success"); + if (!sent) + return; + + mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; + + /* Recv all full MSS datagrams */ + for (i = 0; i < test->r_num_mss; i++) { + ret = recv_one(fdr, 0); + if (ret != mss) + error(1, 0, "recv.%d: %d != %d", i, ret, mss); + } + + /* Recv the non-full last datagram, if tlen was not a multiple of mss */ + if (test->r_len_last) { + ret = recv_one(fdr, 0); + if (ret != test->r_len_last) + error(1, 0, "recv.%d: %d != %d (last)", + i, ret, test->r_len_last); + } + + /* Verify received all data */ + ret = recv_one(fdr, MSG_DONTWAIT); + if (ret) + error(1, 0, "recv: unexpected datagram"); +} + +static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) +{ + struct testcase *tests, *test; + + tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; + + for (test = tests; test->tlen; test++) { + /* if a specific test is given, then skip all others */ + if (cfg_specific_test_id == -1 || + cfg_specific_test_id == test - tests) + run_one(test, fdt, fdr, addr, alen); + } +} + +static void run_test(struct sockaddr *addr, socklen_t alen) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + int fdr, fdt, val; + + fdr = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdr == -1) + error(1, errno, "socket r"); + + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + + /* Have tests fail quickly instead of hang */ + if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + fdt = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + /* Do not fragment these datagrams: only succeed if GSO works */ + set_pmtu_discover(fdt, addr->sa_family == AF_INET); + + if (cfg_do_connectionless) { + set_device_mtu(fdt, CONST_MTU_TEST); + run_all(fdt, fdr, addr, alen); + } + + if (cfg_do_connected) { + set_device_mtu(fdt, CONST_MTU_TEST + 100); + set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); + + if (connect(fdt, addr, alen)) + error(1, errno, "connect"); + + val = get_path_mtu(fdt, addr->sa_family == AF_INET); + if (val != CONST_MTU_TEST) + error(1, 0, "bad path mtu %u\n", val); + + run_all(fdt, fdr, addr, 0 /* use connected addr */); + } + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); +} + +static void run_test_v4(void) +{ + struct sockaddr_in addr = {0}; + + addr.sin_family = AF_INET; + addr.sin_port = htons(cfg_port); + addr.sin_addr = addr4; + + run_test((void *)&addr, sizeof(addr)); +} + +static void run_test_v6(void) +{ + struct sockaddr_in6 addr = {0}; + + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = addr6; + + run_test((void *)&addr, sizeof(addr)); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + switch (c) { + case '4': + cfg_do_ipv4 = true; + break; + case '6': + cfg_do_ipv6 = true; + break; + case 'c': + cfg_do_connected = true; + break; + case 'C': + cfg_do_connectionless = true; + break; + case 'm': + cfg_do_msgmore = true; + break; + case 's': + cfg_do_setsockopt = true; + break; + case 't': + cfg_specific_test_id = strtoul(optarg, NULL, 0); + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_do_ipv4) + run_test_v4(); + if (cfg_do_ipv6) + run_test_v6(); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh new file mode 100755 index 000000000000..fec24f584fe9 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso regression tests + +echo "ipv4 cmsg" +./in_netns.sh ./udpgso -4 -C + +echo "ipv4 setsockopt" +./in_netns.sh ./udpgso -4 -C -s + +echo "ipv6 cmsg" +./in_netns.sh ./udpgso -6 -C + +echo "ipv6 setsockopt" +./in_netns.sh ./udpgso -6 -C -s + +echo "ipv4 connected" +./in_netns.sh ./udpgso -4 -c + +# blocked on 2nd loopback address +# echo "ipv6 connected" +# ./in_netns.sh ./udpgso -6 -c + +echo "ipv4 msg_more" +./in_netns.sh ./udpgso -4 -C -m + +echo "ipv6 msg_more" +./in_netns.sh ./udpgso -6 -C -m diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh new file mode 100755 index 000000000000..792fa4d0285e --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -0,0 +1,74 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso benchmarks + +wake_children() { + local -r jobs="$(jobs -p)" + + if [[ "${jobs}" != "" ]]; then + kill -1 ${jobs} 2>/dev/null + fi +} +trap wake_children EXIT + +run_one() { + local -r args=$@ + + ./udpgso_bench_rx & + ./udpgso_bench_rx -t & + + ./udpgso_bench_tx ${args} +} + +run_in_netns() { + local -r args=$@ + + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp" + run_in_netns ${args} + + echo "udp gso" + run_in_netns ${args} -S + + echo "udp gso zerocopy" + run_in_netns ${args} -S -z +} + +run_tcp() { + local -r args=$@ + + echo "tcp" + run_in_netns ${args} -t + + echo "tcp zerocopy" + run_in_netns ${args} -t -z +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 127.0.0.1" + local -r ipv6_args="${core_args} -6 -D ::1" + + echo "ipv4" + run_tcp "${ipv4_args}" + run_udp "${ipv4_args}" + + echo "ipv6" + run_tcp "${ipv4_args}" + run_udp "${ipv6_args}" +} + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c new file mode 100644 index 000000000000..727cf67a3f75 --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <limits.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +static int cfg_port = 8000; +static bool cfg_tcp; +static bool cfg_verify; + +static bool interrupted; +static unsigned long packets, bytes; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.events = POLLIN; + pfd.revents = 0; + pfd.fd = fd; + + do { + ret = poll(&pfd, 1, 10); + if (ret == -1) + error(1, errno, "poll"); + if (ret == 0) + continue; + if (pfd.revents != POLLIN) + error(1, errno, "poll: 0x%x expected 0x%x\n", + pfd.revents, POLLIN); + } while (!ret && !interrupted); +} + +static int do_socket(bool do_tcp) +{ + struct sockaddr_in6 addr = {0}; + int fd, val; + + fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + val = 1 << 21; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) + error(1, errno, "setsockopt reuseport"); + + addr.sin6_family = PF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = in6addr_any; + if (bind(fd, (void *) &addr, sizeof(addr))) + error(1, errno, "bind"); + + if (do_tcp) { + int accept_fd = fd; + + if (listen(accept_fd, 1)) + error(1, errno, "listen"); + + do_poll(accept_fd); + + fd = accept(accept_fd, NULL, NULL); + if (fd == -1) + error(1, errno, "accept"); + if (close(accept_fd)) + error(1, errno, "close accept fd"); + } + + return fd; +} + +/* Flush all outstanding bytes for the tcp receive queue */ +static void do_flush_tcp(int fd) +{ + int ret; + + while (true) { + /* MSG_TRUNC flushes up to len bytes */ + ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "flush"); + if (ret == 0) { + /* client detached */ + exit(0); + } + + packets++; + bytes += ret; + } + +} + +static char sanitized_char(char val) +{ + return (val >= 'a' && val <= 'z') ? val : '.'; +} + +static void do_verify_udp(const char *data, int len) +{ + char cur = data[0]; + int i; + + /* verify contents */ + if (cur < 'a' || cur > 'z') + error(1, 0, "data initial byte out of range"); + + for (i = 1; i < len; i++) { + if (cur == 'z') + cur = 'a'; + else + cur++; + + if (data[i] != cur) + error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n", + i, len, + sanitized_char(data[i]), data[i], + sanitized_char(cur), cur); + } +} + +/* Flush all outstanding datagrams. Verify first few bytes of each. */ +static void do_flush_udp(int fd) +{ + static char rbuf[ETH_DATA_LEN]; + int ret, len, budget = 256; + + len = cfg_verify ? sizeof(rbuf) : 0; + while (budget--) { + /* MSG_TRUNC will make return value full datagram length */ + ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "recv"); + if (len) { + if (ret == 0) + error(1, errno, "recv: 0 byte datagram\n"); + + do_verify_udp(rbuf, ret); + } + + packets++; + bytes += ret; + } +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-tv] [-p port]", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "ptv")) != -1) { + switch (c) { + case 'p': + cfg_port = htons(strtoul(optarg, NULL, 0)); + break; + case 't': + cfg_tcp = true; + break; + case 'v': + cfg_verify = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_tcp && cfg_verify) + error(1, 0, "TODO: implement verify mode for tcp"); +} + +static void do_recv(void) +{ + unsigned long tnow, treport; + int fd; + + fd = do_socket(cfg_tcp); + + treport = gettimeofday_ms() + 1000; + do { + do_poll(fd); + + if (cfg_tcp) + do_flush_tcp(fd); + else + do_flush_udp(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + if (packets) + fprintf(stderr, + "%s rx: %6lu MB/s %8lu calls/s\n", + cfg_tcp ? "tcp" : "udp", + bytes >> 20, packets); + bytes = packets = 0; + treport = tnow + 1000; + } + + } while (!interrupted); + + if (close(fd)) + error(1, errno, "close"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + signal(SIGINT, sigint_handler); + + do_recv(); + + return 0; +} diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c new file mode 100644 index 000000000000..e821564053cf --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#ifndef SO_ZEROCOPY +#define SO_ZEROCOPY 60 +#endif + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +#define NUM_PKT 100 + +static bool cfg_cache_trash; +static int cfg_cpu = -1; +static int cfg_connected = true; +static int cfg_family = PF_UNSPEC; +static uint16_t cfg_mss; +static int cfg_payload_len = (1472 * 42); +static int cfg_port = 8000; +static int cfg_runtime_ms = -1; +static bool cfg_segment; +static bool cfg_sendmmsg; +static bool cfg_tcp; +static bool cfg_zerocopy; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; + +static bool interrupted; +static char buf[NUM_PKT][ETH_MAX_MTU]; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int set_cpu(int cpu) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + error(1, 0, "setaffinity %d", cpu); + + return 0; +} + +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static void flush_zerocopy(int fd) +{ + struct msghdr msg = {0}; /* flush */ + int ret; + + while (1) { + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "errqueue"); + if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC)) + error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); + msg.msg_flags = 0; + } +} + +static int send_tcp(int fd, char *data) +{ + int ret, done = 0, count = 0; + + while (done < cfg_payload_len) { + ret = send(fd, data + done, cfg_payload_len - done, + cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "write"); + + done += ret; + count++; + } + + return count; +} + +static int send_udp(int fd, char *data) +{ + int ret, total_len, len, count = 0; + + total_len = cfg_payload_len; + + while (total_len) { + len = total_len < cfg_mss ? total_len : cfg_mss; + + ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, + cfg_connected ? NULL : (void *)&cfg_dst_addr, + cfg_connected ? 0 : cfg_alen); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, errno, "write: %uB != %uB\n", ret, len); + + total_len -= len; + count++; + } + + return count; +} + +static int send_udp_sendmmsg(int fd, char *data) +{ + const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN; + struct mmsghdr mmsgs[max_nr_msg]; + struct iovec iov[max_nr_msg]; + unsigned int off = 0, left; + int i = 0, ret; + + memset(mmsgs, 0, sizeof(mmsgs)); + + left = cfg_payload_len; + while (left) { + if (i == max_nr_msg) + error(1, 0, "sendmmsg: exceeds max_nr_msg"); + + iov[i].iov_base = data + off; + iov[i].iov_len = cfg_mss < left ? cfg_mss : left; + + mmsgs[i].msg_hdr.msg_iov = iov + i; + mmsgs[i].msg_hdr.msg_iovlen = 1; + + off += iov[i].iov_len; + left -= iov[i].iov_len; + i++; + } + + ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmmsg"); + + return ret; +} + +static void send_udp_segment_cmsg(struct cmsghdr *cm) +{ + uint16_t *valp; + + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss)); + valp = (void *)CMSG_DATA(cm); + *valp = cfg_mss; +} + +static int send_udp_segment(int fd, char *data) +{ + char control[CMSG_SPACE(sizeof(cfg_mss))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + int ret; + + iov.iov_base = data; + iov.iov_len = cfg_payload_len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg)); + + msg.msg_name = (void *)&cfg_dst_addr; + msg.msg_namelen = cfg_alen; + + ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != iov.iov_len) + error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len); + + return 1; +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int max_len, hdrlen; + int c; + + while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'c': + cfg_cache_trash = true; + break; + case 'C': + cfg_cpu = strtol(optarg, NULL, 0); + break; + case 'D': + setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + break; + case 'l': + cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; + break; + case 'm': + cfg_sendmmsg = true; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'S': + cfg_segment = true; + break; + case 't': + cfg_tcp = true; + break; + case 'u': + cfg_connected = false; + break; + case 'z': + cfg_zerocopy = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_family == PF_UNSPEC) + error(1, 0, "must pass one of -4 or -6"); + if (cfg_tcp && !cfg_connected) + error(1, 0, "connectionless tcp makes no sense"); + if (cfg_segment && cfg_sendmmsg) + error(1, 0, "cannot combine segment offload and sendmmsg"); + + if (cfg_family == PF_INET) + hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); + else + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr); + + cfg_mss = ETH_DATA_LEN - hdrlen; + max_len = ETH_MAX_MTU - hdrlen; + + if (cfg_payload_len > max_len) + error(1, 0, "payload length %u exceeds max %u", + cfg_payload_len, max_len); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +int main(int argc, char **argv) +{ + unsigned long num_msgs, num_sends; + unsigned long tnow, treport, tstop; + int fd, i, val; + + parse_opts(argc, argv); + + if (cfg_cpu > 0) + set_cpu(cfg_cpu); + + for (i = 0; i < sizeof(buf[0]); i++) + buf[0][i] = 'a' + (i % 26); + for (i = 1; i < NUM_PKT; i++) + memcpy(buf[i], buf[0], sizeof(buf[0])); + + signal(SIGINT, sigint_handler); + + fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + if (cfg_zerocopy) { + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) + error(1, errno, "setsockopt zerocopy"); + } + + if (cfg_connected && + connect(fd, (void *)&cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + + if (cfg_segment) + set_pmtu_discover(fd, cfg_family == PF_INET); + + num_msgs = num_sends = 0; + tnow = gettimeofday_ms(); + tstop = tnow + cfg_runtime_ms; + treport = tnow + 1000; + + i = 0; + do { + if (cfg_tcp) + num_sends += send_tcp(fd, buf[i]); + else if (cfg_segment) + num_sends += send_udp_segment(fd, buf[i]); + else if (cfg_sendmmsg) + num_sends += send_udp_sendmmsg(fd, buf[i]); + else + num_sends += send_udp(fd, buf[i]); + num_msgs++; + + if (cfg_zerocopy && ((num_msgs & 0xF) == 0)) + flush_zerocopy(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + fprintf(stderr, + "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n", + cfg_tcp ? "tcp" : "udp", + (num_msgs * cfg_payload_len) >> 20, + num_sends, num_msgs); + num_msgs = num_sends = 0; + treport = tnow + 1000; + } + + /* cold cache when writing buffer */ + if (cfg_cache_trash) + i = ++i < NUM_PKT ? i : 0; + + } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); + + if (close(fd)) + error(1, errno, "close"); + + return 0; +} diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json index 93cf8fea8ae7..3a2f51fc7fd4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -398,13 +398,83 @@ 255 ] ], - "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args", - "expExitCode": "255", + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", "verifyCmd": "$TC actions ls action csum", "matchPattern": "^[ \t]+index [0-9]* ref", "matchCount": "32", "teardown": [ "$TC actions flush action csum" ] + }, + { + "id": "b4e9", + "name": "Delete batch of 32 csum actions", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ], + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"" + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action csum", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "0015", + "name": "Add batch of 32 csum tcp actions with large cookies", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action csum", + "matchPattern": "^[ \t]+index [0-9]* ref", + "matchCount": "32", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "989e", + "name": "Delete batch of 32 csum actions with large cookies", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ], + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"" + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action csum", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 9f34f0753969..03777730ef29 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -1,7 +1,7 @@ [ { - "id": "a568", - "name": "Add action with ife type", + "id": "7682", + "name": "Create valid ife encode action with mark and pass control", "category": [ "actions", "ife" @@ -12,21 +12,710 @@ 0, 1, 255 - ], - "$TC actions add action ife encode type 0xDEAD index 1" + ] ], - "cmdUnderTest": "$TC actions get action ife index 1", + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "ef47", + "name": "Create valid ife encode action with mark and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "df43", + "name": "Create valid ife encode action with mark and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "e4cf", + "name": "Create valid ife encode action with mark and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "ccba", + "name": "Create valid ife encode action with mark and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "a1cf", + "name": "Create valid ife encode action with mark and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "cb3d", + "name": "Create valid ife encode action with mark value at 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "1efb", + "name": "Create ife encode action with mark value exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90", + "matchCount": "0", + "teardown": [] + }, + { + "id": "95ed", + "name": "Create valid ife encode action with prio and pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "aa17", + "name": "Create valid ife encode action with prio and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "74c7", + "name": "Create valid ife encode action with prio and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "7a97", + "name": "Create valid ife encode action with prio and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "f66b", + "name": "Create valid ife encode action with prio and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "3056", + "name": "Create valid ife encode action with prio and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "7dd3", + "name": "Create valid ife encode action with prio value at 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 99", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "2ca1", + "name": "Create ife encode action with prio value exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 99", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99", + "matchCount": "0", + "teardown": [] + }, + { + "id": "05bb", + "name": "Create valid ife encode action with tcindex and pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "ce65", + "name": "Create valid ife encode action with tcindex and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "09cd", + "name": "Create valid ife encode action with tcindex and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "8eb5", + "name": "Create valid ife encode action with tcindex and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "451a", + "name": "Create valid ife encode action with tcindex and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d76c", + "name": "Create valid ife encode action with tcindex and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "e731", + "name": "Create valid ife encode action with tcindex and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "b7b8", + "name": "Create valid ife encode action with tcindex value at 16-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "d0d8", + "name": "Create ife encode action with tcindex value exceeding 16-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1", + "matchCount": "0", + "teardown": [] + }, + { + "id": "2a9c", + "name": "Create valid ife encode action with mac src parameter", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "cf5c", + "name": "Create valid ife encode action with mac dst parameter", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "2353", + "name": "Create valid ife encode action with mac src and mac dst parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 11", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "552c", + "name": "Create valid ife encode action with mark and type parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xDEAD", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "0421", + "name": "Create valid ife encode action with prio and type parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 21", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" ] }, { - "id": "b983", - "name": "Add action without ife type", + "id": "4017", + "name": "Create valid ife encode action with tcindex and type parameters", "category": [ "actions", "ife" @@ -37,16 +726,339 @@ 0, 1, 255 - ], - "$TC actions add action ife encode index 1" + ] ], - "cmdUnderTest": "$TC actions get action ife index 1", + "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 21", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "fac3", + "name": "Create valid ife encode action with index at 32-bit maximnum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 4294967295", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7c25", + "name": "Create valid ife decode action with pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xED3E", + "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" ] + }, + { + "id": "dccb", + "name": "Create valid ife decode action with pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7bb9", + "name": "Create valid ife decode action with continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d9ad", + "name": "Create valid ife decode action with drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode drop index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "219f", + "name": "Create valid ife decode action with reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "8f44", + "name": "Create valid ife decode action with jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "56cf", + "name": "Create ife encode action with index exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4294967295999", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ee94", + "name": "Create ife encode action with invalid control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "b330", + "name": "Create ife encode action with cookie", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "bbc0", + "name": "Create ife encode action with invalid argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "d54a", + "name": "Create ife encode action with invalid type argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "7ee0", + "name": "Create ife encode action with invalid mac src argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "0a7d", + "name": "Create ife encode action with invalid mac dst argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json new file mode 100644 index 000000000000..3aca33c00039 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json @@ -0,0 +1,588 @@ +[ + { + "id": "9784", + "name": "Add valid sample action with mandatory arguments", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 2", + "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "5c91", + "name": "Add valid sample action with mandatory arguments and continue control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 2", + "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "334b", + "name": "Add valid sample action with mandatory arguments and drop control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "da69", + "name": "Add valid sample action with mandatory arguments and reclassify control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "13ce", + "name": "Add valid sample action with mandatory arguments and pipe control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "1886", + "name": "Add valid sample action with mandatory arguments and jump control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 200", + "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "b6d4", + "name": "Add sample action with mandatory arguments and invalid control action", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "a874", + "name": "Add invalid sample action without mandatory arguments", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ac01", + "name": "Add invalid sample action without mandatory argument rate", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample group 10 index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "4203", + "name": "Add invalid sample action without mandatory argument group", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 100 index 10", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "14a7", + "name": "Add invalid sample action without mandatory argument group", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 100 index 10", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "8f2e", + "name": "Add valid sample action with trunc argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "45f8", + "name": "Add sample action with maximum rate argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "ad0c", + "name": "Add sample action with maximum trunc argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "83a9", + "name": "Add sample action with maximum group argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 1", + "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "ed27", + "name": "Add sample action with invalid rate argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 10", + "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "2eae", + "name": "Add sample action with invalid group argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 1", + "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "6ff3", + "name": "Add sample action with invalid trunc size", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 11", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "2b2a", + "name": "Add sample action with invalid index", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299", + "expExitCode": "255", + "verifyCmd": "$TC actions get action sample index 5294967299", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "dee2", + "name": "Add sample action with maximum allowed index", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 4294967295", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "560e", + "name": "Add sample action with cookie", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd", + "expExitCode": "0", + "verifyCmd": "$TC actions get action sample index 45", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "704a", + "name": "Replace existing sample action with new rate argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "60eb", + "name": "Replace existing sample action with new group argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "2cce", + "name": "Replace existing sample action with new trunc argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 trunc 48 index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + }, + { + "id": "59d1", + "name": "Replace existing sample action with new control argument", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 reclassify index 4" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4", + "expExitCode": "0", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] + } +] |