diff options
194 files changed, 7946 insertions, 1506 deletions
diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index 261c563b5f06..eba0e5e59ebe 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -22,6 +22,11 @@ Required properties: - snps,pbl Programmable Burst Length - snps,fixed-burst Program the DMA to use the fixed burst mode - snps,mixed-burst Program the DMA to use the mixed burst mode +- snps,force_thresh_dma_mode Force DMA to use the threshold mode for + both tx and rx +- snps,force_sf_dma_mode Force DMA to use the Store and Forward + mode for both tx and rx. This flag is + ignored if force_thresh_dma_mode is set. Optional properties: - mac-address: 6 bytes, mac address diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index debfe857d8f9..1cb3aeb4baff 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -482,6 +482,15 @@ tcp_syn_retries - INTEGER tcp_timestamps - BOOLEAN Enable timestamps as defined in RFC1323. +tcp_min_tso_segs - INTEGER + Minimal number of segments per TSO frame. + Since linux-3.12, TCP does an automatic sizing of TSO frames, + depending on flow rate, instead of filling 64Kbytes packets. + For specific usages, it's possible to force TCP to build big + TSO frames. Note that TCP stack might split too big TSO packets + if available window is too small. + Default: 2 + tcp_tso_win_divisor - INTEGER This allows control over what percentage of the congestion window can be consumed by a single TSO frame. @@ -1349,6 +1358,12 @@ mldv2_unsolicited_report_interval - INTEGER MLDv2 report retransmit will take place. Default: 1000 (1 second) +suppress_frag_ndisc - INTEGER + Control RFC 6980 (Security Implications of IPv6 Fragmentation + with IPv6 Neighbor Discovery) behavior: + 1 - (default) discard fragmented neighbor discovery packets + 0 - allow fragmented neighbor discovery packets + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 8572796b1eb6..c01223628a87 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -543,6 +543,14 @@ TPACKET_V2 --> TPACKET_V3: In the AF_PACKET fanout mode, packet reception can be load balanced among processes. This also works in combination with mmap(2) on packet sockets. +Currently implemented fanout policies are: + + - PACKET_FANOUT_HASH: schedule to socket by skb's rxhash + - PACKET_FANOUT_LB: schedule to socket by round-robin + - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on + - PACKET_FANOUT_RND: schedule to socket by random selection + - PACKET_FANOUT_ROLLOVER: if one socket is full, rollover to another + Minimal example code by David S. Miller (try things like "./test eth0 hash", "./test eth0 lb", etc.): diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index 654d2e55c8cb..457b8bbafb08 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -123,6 +123,7 @@ struct plat_stmmacenet_data { int bugged_jumbo; int pmt; int force_sf_dma_mode; + int force_thresh_dma_mode; int riwt_off; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); @@ -159,6 +160,8 @@ Where: o pmt: core has the embedded power module (optional). o force_sf_dma_mode: force DMA to use the Store and Forward mode instead of the Threshold. + o force_thresh_dma_mode: force DMA to use the Shreshold mode other than + the Store and Forward mode. o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode. o fix_mac_speed: this callback is used for modifying some syscfg registers (on ST SoCs) according to the link speed negotiated by the diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index d569f2a424d5..9a0319a82470 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -50,6 +50,19 @@ The maximum number of packets that kernel can handle on a NAPI interrupt, it's a Per-CPU variable. Default: 64 +default_qdisc +-------------- + +The default queuing discipline to use for network devices. This allows +overriding the default queue discipline of pfifo_fast with an +alternative. Since the default queuing discipline is created with the +no additional parameters so is best suited to queuing disciplines that +work well without configuration like stochastic fair queue (sfq), +CoDel (codel) or fair queue CoDel (fq_codel). Don't use queuing disciplines +like Hierarchical Token Bucket or Deficit Round Robin which require setting +up classes and bandwidths. +Default: pfifo_fast + busy_read ---------------- Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL) diff --git a/arch/arm/boot/dts/sama5d3xmb.dtsi b/arch/arm/boot/dts/sama5d3xmb.dtsi index e9521d58e9c1..dba739b6ef36 100644 --- a/arch/arm/boot/dts/sama5d3xmb.dtsi +++ b/arch/arm/boot/dts/sama5d3xmb.dtsi @@ -84,7 +84,7 @@ #address-cells = <1>; #size-cells = <0>; - phy0: ethernet-phy@0 { + phy0: ethernet-phy@1 { interrupt-parent = <&pioE>; interrupts = <30 IRQ_TYPE_EDGE_FALLING>; reg = <1>; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 3a5db7b1df68..018235263596 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -971,58 +971,62 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) /*********************** tlb/rlb shared functions *********************/ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], + u16 vid) { - struct bonding *bond = bond_get_bond_by_slave(slave); struct learning_pkt pkt; + struct sk_buff *skb; int size = sizeof(struct learning_pkt); - int i; + char *data; memset(&pkt, 0, size); memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); memcpy(pkt.mac_src, mac_addr, ETH_ALEN); pkt.type = cpu_to_be16(ETH_P_LOOP); - for (i = 0; i < MAX_LP_BURST; i++) { - struct sk_buff *skb; - char *data; + skb = dev_alloc_skb(size); + if (!skb) + return; + + data = skb_put(skb, size); + memcpy(data, &pkt, size); - skb = dev_alloc_skb(size); + skb_reset_mac_header(skb); + skb->network_header = skb->mac_header + ETH_HLEN; + skb->protocol = pkt.type; + skb->priority = TC_PRIO_CONTROL; + skb->dev = slave->dev; + + if (vid) { + skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vid); if (!skb) { + pr_err("%s: Error: failed to insert VLAN tag\n", + slave->bond->dev->name); return; } + } - data = skb_put(skb, size); - memcpy(data, &pkt, size); - - skb_reset_mac_header(skb); - skb->network_header = skb->mac_header + ETH_HLEN; - skb->protocol = pkt.type; - skb->priority = TC_PRIO_CONTROL; - skb->dev = slave->dev; - - if (bond_vlan_used(bond)) { - struct vlan_entry *vlan; + dev_queue_xmit(skb); +} - vlan = bond_next_vlan(bond, - bond->alb_info.current_alb_vlan); - bond->alb_info.current_alb_vlan = vlan; - if (!vlan) { - kfree_skb(skb); - continue; - } +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +{ + struct bonding *bond = bond_get_bond_by_slave(slave); + struct net_device *upper; + struct list_head *iter; - skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id); - if (!skb) { - pr_err("%s: Error: failed to insert VLAN tag\n", - bond->dev->name); - continue; - } - } + /* send untagged */ + alb_send_lp_vid(slave, mac_addr, 0); - dev_queue_xmit(skb); + /* loop through vlans and send one packet for each */ + rcu_read_lock(); + netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + if (upper->priv_flags & IFF_802_1Q_VLAN) + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_id(upper)); } + rcu_read_unlock(); } static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) @@ -1759,11 +1763,6 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) { - if (bond->alb_info.current_alb_vlan && - (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) { - bond->alb_info.current_alb_vlan = NULL; - } - if (bond->alb_info.rlb_enabled) { rlb_clear_vlan(bond, vlan_id); } diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index e7a5b8b37ea3..e02c9c558c41 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -53,7 +53,6 @@ struct slave; #define TLB_NULL_INDEX 0xffffffff -#define MAX_LP_BURST 3 /* rlb defs */ #define RLB_HASH_TABLE_SIZE 256 @@ -170,7 +169,6 @@ struct alb_bond_info { * rx traffic should be * rebalanced */ - struct vlan_entry *current_alb_vlan; }; int bond_alb_initialize(struct bonding *bond, int rlb_enabled); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7407e65f5d96..c50679f0a6b6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -283,116 +283,6 @@ const char *bond_mode_name(int mode) /*---------------------------------- VLAN -----------------------------------*/ /** - * bond_add_vlan - add a new vlan id on bond - * @bond: bond that got the notification - * @vlan_id: the vlan id to add - * - * Returns -ENOMEM if allocation failed. - */ -static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) -{ - struct vlan_entry *vlan; - - pr_debug("bond: %s, vlan id %d\n", - (bond ? bond->dev->name : "None"), vlan_id); - - vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL); - if (!vlan) - return -ENOMEM; - - INIT_LIST_HEAD(&vlan->vlan_list); - vlan->vlan_id = vlan_id; - - write_lock_bh(&bond->lock); - - list_add_tail(&vlan->vlan_list, &bond->vlan_list); - - write_unlock_bh(&bond->lock); - - pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); - - return 0; -} - -/** - * bond_del_vlan - delete a vlan id from bond - * @bond: bond that got the notification - * @vlan_id: the vlan id to delete - * - * returns -ENODEV if @vlan_id was not found in @bond. - */ -static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) -{ - struct vlan_entry *vlan; - int res = -ENODEV; - - pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); - - block_netpoll_tx(); - write_lock_bh(&bond->lock); - - list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - if (vlan->vlan_id == vlan_id) { - list_del(&vlan->vlan_list); - - if (bond_is_lb(bond)) - bond_alb_clear_vlan(bond, vlan_id); - - pr_debug("removed VLAN ID %d from bond %s\n", - vlan_id, bond->dev->name); - - kfree(vlan); - - res = 0; - goto out; - } - } - - pr_debug("couldn't find VLAN ID %d in bond %s\n", - vlan_id, bond->dev->name); - -out: - write_unlock_bh(&bond->lock); - unblock_netpoll_tx(); - return res; -} - -/** - * bond_next_vlan - safely skip to the next item in the vlans list. - * @bond: the bond we're working on - * @curr: item we're advancing from - * - * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, - * or @curr->next otherwise (even if it is @curr itself again). - * - * Caller must hold bond->lock - */ -struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) -{ - struct vlan_entry *next, *last; - - if (list_empty(&bond->vlan_list)) - return NULL; - - if (!curr) { - next = list_entry(bond->vlan_list.next, - struct vlan_entry, vlan_list); - } else { - last = list_entry(bond->vlan_list.prev, - struct vlan_entry, vlan_list); - if (last == curr) { - next = list_entry(bond->vlan_list.next, - struct vlan_entry, vlan_list); - } else { - next = list_entry(curr->vlan_list.next, - struct vlan_entry, vlan_list); - } - } - - return next; -} - -/** * bond_dev_queue_xmit - Prepare skb for xmit. * * @bond: bond device that got this skb for tx. @@ -451,13 +341,6 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, goto unwind; } - res = bond_add_vlan(bond, vid); - if (res) { - pr_err("%s: Error: Failed to add vlan id %d\n", - bond_dev->name, vid); - goto unwind; - } - return 0; unwind: @@ -478,17 +361,12 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - int res; bond_for_each_slave(bond, slave) vlan_vid_del(slave->dev, proto, vid); - res = bond_del_vlan(bond, vid); - if (res) { - pr_err("%s: Error: Failed to remove vlan id %d\n", - bond_dev->name, vid); - return res; - } + if (bond_is_lb(bond)) + bond_alb_clear_vlan(bond, vid); return 0; } @@ -1954,7 +1832,7 @@ static int __bond_release_one(struct net_device *bond_dev, bond_set_carrier(bond); eth_hw_addr_random(bond_dev); - if (bond_vlan_used(bond)) { + if (vlan_uses_dev(bond_dev)) { pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", bond_dev->name, bond_dev->name); pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", @@ -2392,24 +2270,25 @@ re_arm: } } -static int bond_has_this_ip(struct bonding *bond, __be32 ip) +static bool bond_has_this_ip(struct bonding *bond, __be32 ip) { - struct vlan_entry *vlan; - struct net_device *vlan_dev; + struct net_device *upper; + struct list_head *iter; + bool ret = false; if (ip == bond_confirm_addr(bond->dev, 0, ip)) - return 1; + return true; - list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - rcu_read_lock(); - vlan_dev = __vlan_find_dev_deep(bond->dev, htons(ETH_P_8021Q), - vlan->vlan_id); - rcu_read_unlock(); - if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip)) - return 1; + rcu_read_lock(); + netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + if (ip == bond_confirm_addr(upper, 0, ip)) { + ret = true; + break; + } } + rcu_read_unlock(); - return 0; + return ret; } /* @@ -2444,81 +2323,79 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) { - int i, vlan_id; - __be32 *targets = bond->params.arp_targets; - struct vlan_entry *vlan; - struct net_device *vlan_dev = NULL; + struct net_device *upper, *vlan_upper; + struct list_head *iter, *vlan_iter; struct rtable *rt; + __be32 *targets = bond->params.arp_targets, addr; + int i, vlan_id; - for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { - __be32 addr; - if (!targets[i]) - break; + for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { pr_debug("basa: target %pI4\n", &targets[i]); - if (!bond_vlan_used(bond)) { - pr_debug("basa: empty vlan: arp_send\n"); - addr = bond_confirm_addr(bond->dev, targets[i], 0); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, 0); - continue; - } - /* - * If VLANs are configured, we do a route lookup to - * determine which VLAN interface would be used, so we - * can tag the ARP with the proper VLAN tag. - */ + /* Find out through which dev should the packet go */ rt = ip_route_output(dev_net(bond->dev), targets[i], 0, RTO_ONLINK, 0); if (IS_ERR(rt)) { - if (net_ratelimit()) { - pr_warning("%s: no route to arp_ip_target %pI4\n", - bond->dev->name, &targets[i]); - } + pr_debug("%s: no route to arp_ip_target %pI4\n", + bond->dev->name, &targets[i]); continue; } - /* - * This target is not on a VLAN + vlan_id = 0; + + /* bond device itself */ + if (rt->dst.dev == bond->dev) + goto found; + + rcu_read_lock(); + /* first we search only for vlan devices. for every vlan + * found we verify its upper dev list, searching for the + * rt->dst.dev. If found we save the tag of the vlan and + * proceed to send the packet. + * + * TODO: QinQ? */ - if (rt->dst.dev == bond->dev) { - ip_rt_put(rt); - pr_debug("basa: rtdev == bond->dev: arp_send\n"); - addr = bond_confirm_addr(bond->dev, targets[i], 0); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, 0); - continue; + netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) { + if (!is_vlan_dev(vlan_upper)) + continue; + netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) { + if (upper == rt->dst.dev) { + vlan_id = vlan_dev_vlan_id(vlan_upper); + rcu_read_unlock(); + goto found; + } + } } - vlan_id = 0; - list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - rcu_read_lock(); - vlan_dev = __vlan_find_dev_deep(bond->dev, - htons(ETH_P_8021Q), - vlan->vlan_id); - rcu_read_unlock(); - if (vlan_dev == rt->dst.dev) { - vlan_id = vlan->vlan_id; - pr_debug("basa: vlan match on %s %d\n", - vlan_dev->name, vlan_id); - break; + /* if the device we're looking for is not on top of any of + * our upper vlans, then just search for any dev that + * matches, and in case it's a vlan - save the id + */ + netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + if (upper == rt->dst.dev) { + /* if it's a vlan - get its VID */ + if (is_vlan_dev(upper)) + vlan_id = vlan_dev_vlan_id(upper); + + rcu_read_unlock(); + goto found; } } + rcu_read_unlock(); - if (vlan_id && vlan_dev) { - ip_rt_put(rt); - addr = bond_confirm_addr(vlan_dev, targets[i], 0); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, vlan_id); - continue; - } + /* Not our device - skip */ + pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", + bond->dev->name, &targets[i], + rt->dst.dev ? rt->dst.dev->name : "NULL"); - if (net_ratelimit()) { - pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", - bond->dev->name, &targets[i], - rt->dst.dev ? rt->dst.dev->name : "NULL"); - } ip_rt_put(rt); + continue; + +found: + addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); + ip_rt_put(rt); + bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], + addr, vlan_id); } } @@ -4142,7 +4019,6 @@ static void bond_setup(struct net_device *bond_dev) /* Initialize pointers */ bond->dev = bond_dev; - INIT_LIST_HEAD(&bond->vlan_list); /* Initialize the device entry points */ ether_setup(bond_dev); @@ -4195,7 +4071,6 @@ static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *tmp_slave; - struct vlan_entry *vlan, *tmp; bond_netpoll_cleanup(bond_dev); @@ -4207,11 +4082,6 @@ static void bond_uninit(struct net_device *bond_dev) list_del(&bond->bond_list); bond_debug_unregister(bond); - - list_for_each_entry_safe(vlan, tmp, &bond->vlan_list, vlan_list) { - list_del(&vlan->vlan_list); - kfree(vlan); - } } /*------------------------- Module initialization ---------------------------*/ diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 4bf52d5f637e..4abc925823e1 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -185,11 +185,6 @@ struct bond_parm_tbl { #define BOND_MAX_MODENAME_LEN 20 -struct vlan_entry { - struct list_head vlan_list; - unsigned short vlan_id; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct list_head list; @@ -254,7 +249,6 @@ struct bonding { struct ad_bond_info ad_info; struct alb_bond_info alb_info; struct bond_params params; - struct list_head vlan_list; struct workqueue_struct *wq; struct delayed_work mii_work; struct delayed_work arp_work; @@ -267,9 +261,22 @@ struct bonding { #endif /* CONFIG_DEBUG_FS */ }; +/* if we hold rtnl_lock() - call vlan_uses_dev() */ static inline bool bond_vlan_used(struct bonding *bond) { - return !list_empty(&bond->vlan_list); + struct net_device *upper; + struct list_head *iter; + + rcu_read_lock(); + netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + if (upper->priv_flags & IFF_802_1Q_VLAN) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; } #define bond_slave_get_rcu(dev) \ diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index b7232a9b7756..f92f001551da 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -840,7 +840,7 @@ static int ax_probe(struct platform_device *pdev) ei_local = netdev_priv(dev); ax = to_ax_dev(dev); - ax->plat = pdev->dev.platform_data; + ax->plat = dev_get_platdata(&pdev->dev); platform_set_drvdata(pdev, dev); ei_local->rxcr_base = ax->plat->rcr_val; diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index e904b3838dcc..e66684a438f5 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1647,12 +1647,12 @@ static int bfin_mac_probe(struct platform_device *pdev) setup_mac_addr(ndev->dev_addr); - if (!pdev->dev.platform_data) { + if (!dev_get_platdata(&pdev->dev)) { dev_err(&pdev->dev, "Cannot get platform device bfin_mii_bus!\n"); rc = -ENODEV; goto out_err_probe_mac; } - pd = pdev->dev.platform_data; + pd = dev_get_platdata(&pdev->dev); lp->mii_bus = platform_get_drvdata(pd); if (!lp->mii_bus) { dev_err(&pdev->dev, "Cannot get mii_bus!\n"); @@ -1660,7 +1660,7 @@ static int bfin_mac_probe(struct platform_device *pdev) goto out_err_probe_mac; } lp->mii_bus->priv = ndev; - mii_bus_data = pd->dev.platform_data; + mii_bus_data = dev_get_platdata(&pd->dev); rc = mii_probe(ndev, mii_bus_data->phy_mode); if (rc) { diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 7ff4b30d55ea..e06694555144 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1464,18 +1464,18 @@ static int greth_of_probe(struct platform_device *ofdev) } /* Allocate TX descriptor ring in coherent memory */ - greth->tx_bd_base = dma_alloc_coherent(greth->dev, 1024, - &greth->tx_bd_base_phys, - GFP_KERNEL | __GFP_ZERO); + greth->tx_bd_base = dma_zalloc_coherent(greth->dev, 1024, + &greth->tx_bd_base_phys, + GFP_KERNEL); if (!greth->tx_bd_base) { err = -ENOMEM; goto error3; } /* Allocate RX descriptor ring in coherent memory */ - greth->rx_bd_base = dma_alloc_coherent(greth->dev, 1024, - &greth->rx_bd_base_phys, - GFP_KERNEL | __GFP_ZERO); + greth->rx_bd_base = dma_zalloc_coherent(greth->dev, 1024, + &greth->rx_bd_base_phys, + GFP_KERNEL); if (!greth->rx_bd_base) { err = -ENOMEM; goto error4; diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index ceb45bc963a9..91d52b495848 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1131,7 +1131,7 @@ static int au1000_probe(struct platform_device *pdev) writel(0, aup->enable); aup->mac_enabled = 0; - pd = pdev->dev.platform_data; + pd = dev_get_platdata(&pdev->dev); if (!pd) { dev_info(&pdev->dev, "no platform_data passed," " PHY search on MAC0\n"); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 190219e02400..8ac48fbf8a66 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -948,8 +948,7 @@ static int bcm_enet_open(struct net_device *dev) /* allocate rx dma ring */ size = priv->rx_ring_size * sizeof(struct bcm_enet_desc); - p = dma_alloc_coherent(kdev, size, &priv->rx_desc_dma, - GFP_KERNEL | __GFP_ZERO); + p = dma_zalloc_coherent(kdev, size, &priv->rx_desc_dma, GFP_KERNEL); if (!p) { ret = -ENOMEM; goto out_freeirq_tx; @@ -960,8 +959,7 @@ static int bcm_enet_open(struct net_device *dev) /* allocate tx dma ring */ size = priv->tx_ring_size * sizeof(struct bcm_enet_desc); - p = dma_alloc_coherent(kdev, size, &priv->tx_desc_dma, - GFP_KERNEL | __GFP_ZERO); + p = dma_zalloc_coherent(kdev, size, &priv->tx_desc_dma, GFP_KERNEL); if (!p) { ret = -ENOMEM; goto out_free_rx_ring; @@ -1800,7 +1798,7 @@ static int bcm_enet_probe(struct platform_device *pdev) priv->rx_ring_size = BCMENET_DEF_RX_DESC; priv->tx_ring_size = BCMENET_DEF_TX_DESC; - pd = pdev->dev.platform_data; + pd = dev_get_platdata(&pdev->dev); if (pd) { memcpy(dev->dev_addr, pd->mac_addr, ETH_ALEN); priv->has_phy = pd->has_phy; @@ -1964,7 +1962,7 @@ static int bcm_enet_remove(struct platform_device *pdev) } else { struct bcm63xx_enet_platform_data *pd; - pd = pdev->dev.platform_data; + pd = dev_get_platdata(&pdev->dev); if (pd && pd->mii_config) pd->mii_config(dev, 0, bcm_enet_mdio_read_mii, bcm_enet_mdio_write_mii); @@ -2742,7 +2740,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) priv->tx_ring_size = BCMENET_DEF_TX_DESC; priv->dma_maxburst = BCMENETSW_DMA_MAXBURST; - pd = pdev->dev.platform_data; + pd = dev_get_platdata(&pdev->dev); if (pd) { memcpy(dev->dev_addr, pd->mac_addr, ETH_ALEN); memcpy(priv->used_ports, pd->used_ports, diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 4148058cef81..e838a3f74b69 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -853,9 +853,8 @@ bnx2_alloc_mem(struct bnx2 *bp) bp->status_stats_size = status_blk_size + sizeof(struct statistics_block); - status_blk = dma_alloc_coherent(&bp->pdev->dev, bp->status_stats_size, - &bp->status_blk_mapping, - GFP_KERNEL | __GFP_ZERO); + status_blk = dma_zalloc_coherent(&bp->pdev->dev, bp->status_stats_size, + &bp->status_blk_mapping, GFP_KERNEL); if (status_blk == NULL) goto alloc_mem_err; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 12202f81735c..3e77a1b1a44a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2069,9 +2069,8 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, bool is_pf); -#define BNX2X_ILT_ZALLOC(x, y, size) \ - x = dma_alloc_coherent(&bp->pdev->dev, size, y, \ - GFP_KERNEL | __GFP_ZERO) +#define BNX2X_ILT_ZALLOC(x, y, size) \ + x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL) #define BNX2X_ILT_FREE(x, y, size) \ do { \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 38be494ffa6e..affb7646241e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -51,8 +51,7 @@ extern int int_mode; #define BNX2X_PCI_ALLOC(x, y, size) \ do { \ - x = dma_alloc_coherent(&bp->pdev->dev, size, y, \ - GFP_KERNEL | __GFP_ZERO); \ + x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \ if (x == NULL) \ goto alloc_mem_err; \ DP(NETIF_MSG_HW, "BNX2X_PCI_ALLOC: Physical %Lx Virtual %p\n", \ diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 95b8995187d7..2e55ee29cf13 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8591,10 +8591,10 @@ static int tg3_mem_rx_acquire(struct tg3 *tp) if (!i && tg3_flag(tp, ENABLE_RSS)) continue; - tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev, - TG3_RX_RCB_RING_BYTES(tp), - &tnapi->rx_rcb_mapping, - GFP_KERNEL | __GFP_ZERO); + tnapi->rx_rcb = dma_zalloc_coherent(&tp->pdev->dev, + TG3_RX_RCB_RING_BYTES(tp), + &tnapi->rx_rcb_mapping, + GFP_KERNEL); if (!tnapi->rx_rcb) goto err_out; } @@ -8643,10 +8643,9 @@ static int tg3_alloc_consistent(struct tg3 *tp) { int i; - tp->hw_stats = dma_alloc_coherent(&tp->pdev->dev, - sizeof(struct tg3_hw_stats), - &tp->stats_mapping, - GFP_KERNEL | __GFP_ZERO); + tp->hw_stats = dma_zalloc_coherent(&tp->pdev->dev, + sizeof(struct tg3_hw_stats), + &tp->stats_mapping, GFP_KERNEL); if (!tp->hw_stats) goto err_out; @@ -8654,10 +8653,10 @@ static int tg3_alloc_consistent(struct tg3 *tp) struct tg3_napi *tnapi = &tp->napi[i]; struct tg3_hw_status *sblk; - tnapi->hw_status = dma_alloc_coherent(&tp->pdev->dev, - TG3_HW_STATUS_SIZE, - &tnapi->status_mapping, - GFP_KERNEL | __GFP_ZERO); + tnapi->hw_status = dma_zalloc_coherent(&tp->pdev->dev, + TG3_HW_STATUS_SIZE, + &tnapi->status_mapping, + GFP_KERNEL); if (!tnapi->hw_status) goto err_out; diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c index bb5d63fb2e6d..ce75de9bae9e 100644 --- a/drivers/net/ethernet/cadence/at91_ether.c +++ b/drivers/net/ethernet/cadence/at91_ether.c @@ -304,7 +304,7 @@ MODULE_DEVICE_TABLE(of, at91ether_dt_ids); /* Detect MAC & PHY and perform ethernet interface initialization */ static int __init at91ether_probe(struct platform_device *pdev) { - struct macb_platform_data *board_data = pdev->dev.platform_data; + struct macb_platform_data *board_data = dev_get_platdata(&pdev->dev); struct resource *regs; struct net_device *dev; struct phy_device *phydev; diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index fe06ab0f7375..92578690f6de 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -125,7 +125,7 @@ void macb_get_hwaddr(struct macb *bp) u8 addr[6]; int i; - pdata = bp->pdev->dev.platform_data; + pdata = dev_get_platdata(&bp->pdev->dev); /* Check all 4 address register for vaild address */ for (i = 0; i < 4; i++) { @@ -276,7 +276,7 @@ static int macb_mii_probe(struct net_device *dev) phydev = phy_find_first(bp->mii_bus); if (!phydev) { netdev_err(dev, "no PHY found\n"); - return -1; + return -ENXIO; } pdata = dev_get_platdata(&bp->pdev->dev); @@ -335,7 +335,7 @@ int macb_mii_init(struct macb *bp) bp->pdev->name, bp->pdev->id); bp->mii_bus->priv = bp; bp->mii_bus->parent = &bp->dev->dev; - pdata = bp->pdev->dev.platform_data; + pdata = dev_get_platdata(&bp->pdev->dev); bp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); if (!bp->mii_bus->irq) { @@ -379,9 +379,9 @@ int macb_mii_init(struct macb *bp) if (err) goto err_out_free_mdio_irq; - if (macb_mii_probe(bp->dev) != 0) { + err = macb_mii_probe(bp->dev); + if (err) goto err_out_unregister_bus; - } return 0; @@ -1851,7 +1851,7 @@ static int __init macb_probe(struct platform_device *pdev) err = of_get_phy_mode(pdev->dev.of_node); if (err < 0) { - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (pdata && pdata->is_rmii) bp->phy_interface = PHY_INTERFACE_MODE_RMII; else diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index e3d4ec836f8b..ec88de4ac162 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -814,7 +814,7 @@ static int ep93xx_eth_probe(struct platform_device *pdev) if (pdev == NULL) return -ENODEV; - data = pdev->dev.platform_data; + data = dev_get_platdata(&pdev->dev); mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq = platform_get_irq(pdev, 0); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index a13b312b50f2..5f5896e522d2 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1384,7 +1384,7 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) static int dm9000_probe(struct platform_device *pdev) { - struct dm9000_plat_data *pdata = pdev->dev.platform_data; + struct dm9000_plat_data *pdata = dev_get_platdata(&pdev->dev); struct board_info *db; /* Point a board information structure */ struct net_device *ndev; const unsigned char *mac_src; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 50116f8ed576..39e0a7697a81 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -145,8 +145,8 @@ static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q, q->len = len; q->entry_size = entry_size; mem->size = len * entry_size; - mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma, - GFP_KERNEL | __GFP_ZERO); + mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma, + GFP_KERNEL); if (!mem->va) return -ENOMEM; return 0; @@ -2642,8 +2642,8 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable) memset(mac, 0, ETH_ALEN); cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config); - cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, - GFP_KERNEL | __GFP_ZERO); + cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, + GFP_KERNEL); if (cmd.va == NULL) return -1; @@ -3946,9 +3946,9 @@ static int be_ctrl_init(struct be_adapter *adapter) memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox)); rx_filter->size = sizeof(struct be_cmd_req_rx_filter); - rx_filter->va = dma_alloc_coherent(&adapter->pdev->dev, rx_filter->size, - &rx_filter->dma, - GFP_KERNEL | __GFP_ZERO); + rx_filter->va = dma_zalloc_coherent(&adapter->pdev->dev, + rx_filter->size, &rx_filter->dma, + GFP_KERNEL); if (rx_filter->va == NULL) { status = -ENOMEM; goto free_mbox; @@ -3994,8 +3994,8 @@ static int be_stats_init(struct be_adapter *adapter) /* BE3 and Skyhawk */ cmd->size = sizeof(struct be_cmd_req_get_stats_v1); - cmd->va = dma_alloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma, - GFP_KERNEL | __GFP_ZERO); + cmd->va = dma_zalloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma, + GFP_KERNEL); if (cmd->va == NULL) return -1; return 0; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index cf579fb39bc5..4de8cfd149cf 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1030,8 +1030,8 @@ static int ethoc_probe(struct platform_device *pdev) } /* Allow the platform setup code to pass in a MAC address. */ - if (pdev->dev.platform_data) { - struct ethoc_platform_data *pdata = pdev->dev.platform_data; + if (dev_get_platdata(&pdev->dev)) { + struct ethoc_platform_data *pdata = dev_get_platdata(&pdev->dev); memcpy(netdev->dev_addr, pdata->hwaddr, IFHWADDRLEN); priv->phy_id = pdata->phy_id; } else { diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 934e1ae279f0..212f44b3a773 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -778,10 +778,9 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) { int i; - priv->descs = dma_alloc_coherent(priv->dev, - sizeof(struct ftgmac100_descs), - &priv->descs_dma_addr, - GFP_KERNEL | __GFP_ZERO); + priv->descs = dma_zalloc_coherent(priv->dev, + sizeof(struct ftgmac100_descs), + &priv->descs_dma_addr, GFP_KERNEL); if (!priv->descs) return -ENOMEM; diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 4658f4cc1969..8be5b40c0a12 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -732,10 +732,10 @@ static int ftmac100_alloc_buffers(struct ftmac100 *priv) { int i; - priv->descs = dma_alloc_coherent(priv->dev, - sizeof(struct ftmac100_descs), - &priv->descs_dma_addr, - GFP_KERNEL | __GFP_ZERO); + priv->descs = dma_zalloc_coherent(priv->dev, + sizeof(struct ftmac100_descs), + &priv->descs_dma_addr, + GFP_KERNEL); if (!priv->descs) return -ENOMEM; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index fdf9307ba9e6..0cd5e4b8b545 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -69,7 +69,6 @@ static void set_multicast_list(struct net_device *ndev); #endif #define DRIVER_NAME "fec" -#define FEC_NAPI_WEIGHT 64 /* Pause frame feild and FIFO threshold */ #define FEC_ENET_FCE (1 << 5) @@ -1060,7 +1059,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget) static void fec_get_mac(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_platform_data *pdata = fep->pdev->dev.platform_data; + struct fec_platform_data *pdata = dev_get_platdata(&fep->pdev->dev); unsigned char *iap, tmpaddr[ETH_ALEN]; /* @@ -1100,10 +1099,10 @@ static void fec_get_mac(struct net_device *ndev) * 4) FEC mac registers set by bootloader */ if (!is_valid_ether_addr(iap)) { - *((unsigned long *) &tmpaddr[0]) = - be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW)); - *((unsigned short *) &tmpaddr[4]) = - be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16); + *((__be32 *) &tmpaddr[0]) = + cpu_to_be32(readl(fep->hwp + FEC_ADDR_LOW)); + *((__be16 *) &tmpaddr[4]) = + cpu_to_be16(readl(fep->hwp + FEC_ADDR_HIGH) >> 16); iap = &tmpaddr[0]; } @@ -1981,7 +1980,7 @@ static int fec_enet_init(struct net_device *ndev) ndev->ethtool_ops = &fec_enet_ethtool_ops; writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK); - netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, FEC_NAPI_WEIGHT); + netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT); if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN) { /* enable hw VLAN support */ @@ -2089,7 +2088,7 @@ fec_probe(struct platform_device *pdev) ret = of_get_phy_mode(pdev->dev.of_node); if (ret < 0) { - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (pdata) fep->phy_interface = pdata->phy; else diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 856ea66c9223..dac564c25440 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -637,8 +637,8 @@ static int mal_probe(struct platform_device *ofdev) bd_size = sizeof(struct mal_descriptor) * (NUM_TX_BUFF * mal->num_tx_chans + NUM_RX_BUFF * mal->num_rx_chans); - mal->bd_virt = dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma, - GFP_KERNEL | __GFP_ZERO); + mal->bd_virt = dma_zalloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma, + GFP_KERNEL); if (mal->bd_virt == NULL) { err = -ENOMEM; goto fail_unmap; diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 82a967c95598..73a8aeefb92a 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1019,8 +1019,8 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) txdr->size = txdr->count * sizeof(struct e1000_tx_desc); txdr->size = ALIGN(txdr->size, 4096); - txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma, - GFP_KERNEL | __GFP_ZERO); + txdr->desc = dma_zalloc_coherent(&pdev->dev, txdr->size, &txdr->dma, + GFP_KERNEL); if (!txdr->desc) { ret_val = 2; goto err_nomem; @@ -1077,8 +1077,8 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) } rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc); - rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, - GFP_KERNEL | __GFP_ZERO); + rxdr->desc = dma_zalloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, + GFP_KERNEL); if (!rxdr->desc) { ret_val = 6; goto err_nomem; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index fce3e92f9d11..9f6b236828e6 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -718,8 +718,8 @@ ixgb_setup_tx_resources(struct ixgb_adapter *adapter) txdr->size = txdr->count * sizeof(struct ixgb_tx_desc); txdr->size = ALIGN(txdr->size, 4096); - txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma, - GFP_KERNEL | __GFP_ZERO); + txdr->desc = dma_zalloc_coherent(&pdev->dev, txdr->size, &txdr->dma, + GFP_KERNEL); if (!txdr->desc) { vfree(txdr->buffer_info); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 207f68fbe3d3..007a0083a636 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -49,6 +49,7 @@ static s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg_wait_to_complete); +static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw); static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, bool autoneg_wait_to_complete); static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, @@ -141,11 +142,13 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) goto setup_sfp_out; } - hw->eeprom.ops.read(hw, ++data_offset, &data_value); + if (hw->eeprom.ops.read(hw, ++data_offset, &data_value)) + goto setup_sfp_err; while (data_value != 0xffff) { IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value); IXGBE_WRITE_FLUSH(hw); - hw->eeprom.ops.read(hw, ++data_offset, &data_value); + if (hw->eeprom.ops.read(hw, ++data_offset, &data_value)) + goto setup_sfp_err; } /* Release the semaphore */ @@ -191,6 +194,17 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) setup_sfp_out: return ret_val; + +setup_sfp_err: + /* Release the semaphore */ + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); + /* Delay obtaining semaphore again to allow FW access, + * semaphore_delay is in ms usleep_range needs us. + */ + usleep_range(hw->eeprom.semaphore_delay * 1000, + hw->eeprom.semaphore_delay * 2000); + hw_err(hw, "eeprom read at offset %d failed\n", data_offset); + return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; } static s32 ixgbe_get_invariants_82599(struct ixgbe_hw *hw) @@ -365,8 +379,13 @@ static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, if (hw->phy.multispeed_fiber) { *speed |= IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - *autoneg = true; + IXGBE_LINK_SPEED_1GB_FULL; + + /* QSFP must not enable auto-negotiation */ + if (hw->phy.media_type == ixgbe_media_type_fiber_qsfp) + *autoneg = false; + else + *autoneg = true; } out: @@ -432,6 +451,24 @@ out: } /** + * ixgbe_stop_mac_link_on_d3_82599 - Disables link on D3 + * @hw: pointer to hardware structure + * + * Disables link, should be called during D3 power down sequence. + * + */ +static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw) +{ + u32 autoc2_reg; + + if (!hw->mng_fw_enabled && !hw->wol_enabled) { + autoc2_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + autoc2_reg |= IXGBE_AUTOC2_LINK_DISABLE_ON_D3_MASK; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2_reg); + } +} + +/** * ixgbe_start_mac_link_82599 - Setup MAC link settings * @hw: pointer to hardware structure * @autoneg_wait_to_complete: true when waiting for completion is needed @@ -668,13 +705,18 @@ static s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, goto out; /* Set the module link speed */ - if (hw->phy.media_type == ixgbe_media_type_fiber_fixed) { - ixgbe_set_fiber_fixed_speed(hw, - IXGBE_LINK_SPEED_10GB_FULL); - } else { + switch (hw->phy.media_type) { + case ixgbe_media_type_fiber: esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5); IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); IXGBE_WRITE_FLUSH(hw); + break; + case ixgbe_media_type_fiber_qsfp: + /* QSFP module automatically detects MAC link speed */ + break; + default: + hw_dbg(hw, "Unexpected media type.\n"); + break; } /* Allow module to change analog characteristics (1G->10G) */ @@ -725,14 +767,23 @@ static s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, goto out; /* Set the module link speed */ - if (hw->phy.media_type == ixgbe_media_type_fiber_fixed) { + switch (hw->phy.media_type) { + case ixgbe_media_type_fiber_fixed: ixgbe_set_fiber_fixed_speed(hw, IXGBE_LINK_SPEED_1GB_FULL); - } else { + break; + case ixgbe_media_type_fiber: esdp_reg &= ~IXGBE_ESDP_SDP5; esdp_reg |= IXGBE_ESDP_SDP5_DIR; IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); IXGBE_WRITE_FLUSH(hw); + break; + case ixgbe_media_type_fiber_qsfp: + /* QSFP module automatically detects MAC link speed */ + break; + default: + hw_dbg(hw, "Unexpected media type.\n"); + break; } /* Allow module to change analog characteristics (10G->1G) */ @@ -2161,6 +2212,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) { s32 status = IXGBE_ERR_EEPROM_VERSION; u16 fw_offset, fw_ptp_cfg_offset; + u16 offset; u16 fw_version = 0; /* firmware check is only necessary for SFI devices */ @@ -2170,29 +2222,35 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) } /* get the offset to the Firmware Module block */ - hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); + offset = IXGBE_FW_PTR; + if (hw->eeprom.ops.read(hw, offset, &fw_offset)) + goto fw_version_err; if ((fw_offset == 0) || (fw_offset == 0xFFFF)) goto fw_version_out; /* get the offset to the Pass Through Patch Configuration block */ - hw->eeprom.ops.read(hw, (fw_offset + - IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR), - &fw_ptp_cfg_offset); + offset = fw_offset + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR; + if (hw->eeprom.ops.read(hw, offset, &fw_ptp_cfg_offset)) + goto fw_version_err; if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF)) goto fw_version_out; /* get the firmware version */ - hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset + - IXGBE_FW_PATCH_VERSION_4), - &fw_version); + offset = fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4; + if (hw->eeprom.ops.read(hw, offset, &fw_version)) + goto fw_version_err; if (fw_version > 0x5) status = 0; fw_version_out: return status; + +fw_version_err: + hw_err(hw, "eeprom read at offset %d failed\n", offset); + return IXGBE_ERR_EEPROM_VERSION; } /** @@ -2477,6 +2535,7 @@ static struct ixgbe_mac_operations mac_ops_82599 = { .set_lan_id = &ixgbe_set_lan_id_multi_port_pcie, .read_analog_reg8 = &ixgbe_read_analog_reg8_82599, .write_analog_reg8 = &ixgbe_write_analog_reg8_82599, + .stop_link_on_d3 = &ixgbe_stop_mac_link_on_d3_82599, .setup_link = &ixgbe_setup_mac_link_82599, .set_rxpba = &ixgbe_set_rxpba_generic, .check_link = &ixgbe_check_mac_link_generic, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 50e62a2b1a65..b5c434b617b1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -2740,13 +2740,19 @@ out: static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw, u16 *san_mac_offset) { + s32 ret_val; + /* * First read the EEPROM pointer to see if the MAC addresses are * available. */ - hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset); + ret_val = hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, + san_mac_offset); + if (ret_val) + hw_err(hw, "eeprom read at offset %d failed\n", + IXGBE_SAN_MAC_ADDR_PTR); - return 0; + return ret_val; } /** @@ -2763,23 +2769,16 @@ s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) { u16 san_mac_data, san_mac_offset; u8 i; + s32 ret_val; /* * First read the EEPROM pointer to see if the MAC addresses are * available. If they're not, no point in calling set_lan_id() here. */ - ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); + ret_val = ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); + if (ret_val || san_mac_offset == 0 || san_mac_offset == 0xFFFF) - if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) { - /* - * No addresses available in this EEPROM. It's not an - * error though, so just wipe the local address and return. - */ - for (i = 0; i < 6; i++) - san_mac_addr[i] = 0xFF; - - goto san_mac_addr_out; - } + goto san_mac_addr_clr; /* make sure we know which port we need to program */ hw->mac.ops.set_lan_id(hw); @@ -2787,14 +2786,26 @@ s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) : (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET); for (i = 0; i < 3; i++) { - hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data); + ret_val = hw->eeprom.ops.read(hw, san_mac_offset, + &san_mac_data); + if (ret_val) { + hw_err(hw, "eeprom read at offset %d failed\n", + san_mac_offset); + goto san_mac_addr_clr; + } san_mac_addr[i * 2] = (u8)(san_mac_data); san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8); san_mac_offset++; } - -san_mac_addr_out: return 0; + +san_mac_addr_clr: + /* No addresses available in this EEPROM. It's not necessarily an + * error though, so just wipe the local address and return. + */ + for (i = 0; i < 6; i++) + san_mac_addr[i] = 0xFF; + return ret_val; } /** @@ -3243,8 +3254,9 @@ s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, *wwpn_prefix = 0xFFFF; /* check if alternative SAN MAC is supported */ - hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR, - &alt_san_mac_blk_offset); + offset = IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR; + if (hw->eeprom.ops.read(hw, offset, &alt_san_mac_blk_offset)) + goto wwn_prefix_err; if ((alt_san_mac_blk_offset == 0) || (alt_san_mac_blk_offset == 0xFFFF)) @@ -3252,19 +3264,26 @@ s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, /* check capability in alternative san mac address block */ offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET; - hw->eeprom.ops.read(hw, offset, &caps); + if (hw->eeprom.ops.read(hw, offset, &caps)) + goto wwn_prefix_err; if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN)) goto wwn_prefix_out; /* get the corresponding prefix for WWNN/WWPN */ offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET; - hw->eeprom.ops.read(hw, offset, wwnn_prefix); + if (hw->eeprom.ops.read(hw, offset, wwnn_prefix)) + hw_err(hw, "eeprom read at offset %d failed\n", offset); offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET; - hw->eeprom.ops.read(hw, offset, wwpn_prefix); + if (hw->eeprom.ops.read(hw, offset, wwpn_prefix)) + goto wwn_prefix_err; wwn_prefix_out: return 0; + +wwn_prefix_err: + hw_err(hw, "eeprom read at offset %d failed\n", offset); + return 0; } /** @@ -3778,7 +3797,11 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) u8 sensor_index; u8 sensor_location; - hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor); + if (hw->eeprom.ops.read(hw, ets_offset + 1 + i, &ets_sensor)) { + hw_err(hw, "eeprom read at offset %d failed\n", + ets_offset + 1 + i); + continue; + } sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> IXGBE_ETS_DATA_INDEX_SHIFT); sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 1315b8ac7f58..d259dc76604e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -143,8 +143,12 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw); #define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS) +#define ixgbe_hw_to_netdev(hw) (((struct ixgbe_adapter *)(hw)->back)->netdev) + #define hw_dbg(hw, format, arg...) \ - netdev_dbg(((struct ixgbe_adapter *)(hw->back))->netdev, format, ##arg) + netdev_dbg(ixgbe_hw_to_netdev(hw), format, ## arg) +#define hw_err(hw, format, arg...) \ + netdev_err(ixgbe_hw_to_netdev(hw), format, ## arg) #define e_dev_info(format, arg...) \ dev_info(&adapter->pdev->dev, format, ## arg) #define e_dev_warn(format, arg...) \ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 50c1e9b2fd80..0e1b973659b0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -311,9 +311,6 @@ static int ixgbe_set_settings(struct net_device *netdev, * this function does not support duplex forcing, but can * limit the advertising of the adapter to the specified speed */ - if (ecmd->autoneg == AUTONEG_DISABLE) - return -EINVAL; - if (ecmd->advertising & ~ecmd->supported) return -EINVAL; @@ -1049,7 +1046,7 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, data[i] = (ixgbe_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) { + for (j = 0; j < netdev->num_tx_queues; j++) { ring = adapter->tx_ring[j]; if (!ring) { data[i] = 0; @@ -1885,11 +1882,12 @@ static void ixgbe_diag_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; bool if_running = netif_running(netdev); set_bit(__IXGBE_TESTING, &adapter->state); if (eth_test->flags == ETH_TEST_FL_OFFLINE) { + struct ixgbe_hw *hw = &adapter->hw; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { int i; for (i = 0; i < adapter->num_vfs; i++) { @@ -1913,21 +1911,18 @@ static void ixgbe_diag_test(struct net_device *netdev, /* Offline tests */ e_info(hw, "offline testing starting\n"); - if (if_running) - /* indicate we're in test mode */ - dev_close(netdev); - - /* bringing adapter down disables SFP+ optics */ - if (hw->mac.ops.enable_tx_laser) - hw->mac.ops.enable_tx_laser(hw); - /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ if (ixgbe_link_test(adapter, &data[4])) eth_test->flags |= ETH_TEST_FL_FAILED; - ixgbe_reset(adapter); + if (if_running) + /* indicate we're in test mode */ + dev_close(netdev); + else + ixgbe_reset(adapter); + e_info(hw, "register testing starting\n"); if (ixgbe_reg_test(adapter, &data[0])) eth_test->flags |= ETH_TEST_FL_FAILED; @@ -1964,13 +1959,11 @@ skip_loopback: clear_bit(__IXGBE_TESTING, &adapter->state); if (if_running) dev_open(netdev); + else if (hw->mac.ops.disable_tx_laser) + hw->mac.ops.disable_tx_laser(hw); } else { e_info(hw, "online testing starting\n"); - /* if adapter is down, SFP+ optics will be disabled */ - if (!if_running && hw->mac.ops.enable_tx_laser) - hw->mac.ops.enable_tx_laser(hw); - /* Online tests */ if (ixgbe_link_test(adapter, &data[4])) eth_test->flags |= ETH_TEST_FL_FAILED; @@ -1984,9 +1977,6 @@ skip_loopback: clear_bit(__IXGBE_TESTING, &adapter->state); } - /* if adapter was down, ensure SFP+ optics are disabled again */ - if (!if_running && hw->mac.ops.disable_tx_laser) - hw->mac.ops.disable_tx_laser(hw); skip_ol_tests: msleep_interruptible(4 * 1000); } @@ -2953,28 +2943,27 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, u32 status = IXGBE_ERR_PHY_ADDR_INVALID; u8 databyte = 0xFF; int i = 0; - int ret_val = 0; if (ee->len == 0) return -EINVAL; - for (i = ee->offset; i < ee->len; i++) { + for (i = ee->offset; i < ee->offset + ee->len; i++) { /* I2C reads can take long time */ if (test_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) return -EBUSY; if (i < ETH_MODULE_SFF_8079_LEN) - status = hw->phy.ops.read_i2c_eeprom(hw, i, &databyte); + status = hw->phy.ops.read_i2c_eeprom(hw, i, &databyte); else status = hw->phy.ops.read_i2c_sff8472(hw, i, &databyte); if (status != 0) - ret_val = -EIO; + return -EIO; data[i - ee->offset] = databyte; } - return ret_val; + return 0; } static const struct ethtool_ops ixgbe_ethtool_ops = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 128d6b885326..7aba452833e5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4175,6 +4175,10 @@ static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw) case ixgbe_phy_sfp_passive_unknown: case ixgbe_phy_sfp_active_unknown: case ixgbe_phy_sfp_ftl_active: + case ixgbe_phy_qsfp_passive_unknown: + case ixgbe_phy_qsfp_active_unknown: + case ixgbe_phy_qsfp_intel: + case ixgbe_phy_qsfp_unknown: return true; case ixgbe_phy_nl: if (hw->mac.type == ixgbe_mac_82598EB) @@ -5292,6 +5296,9 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) return retval; #endif + if (hw->mac.ops.stop_link_on_d3) + hw->mac.ops.stop_link_on_d3(hw); + if (wufc) { ixgbe_set_rx_mode(netdev); @@ -7602,10 +7609,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->mac.type == ixgbe_mac_82598EB) { err = 0; } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - e_dev_err("failed to load because an unsupported SFP+ " - "module type was detected.\n"); - e_dev_err("Reload the driver after installing a supported " - "module.\n"); + e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n"); + e_dev_err("Reload the driver after installing a supported module.\n"); goto err_sw_init; } else if (err) { e_dev_err("HW Init failed: %d\n", err); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 369eef526bc1..e4c676006be9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -791,6 +791,8 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) * Read control word from PHY init contents offset */ ret_val = hw->eeprom.ops.read(hw, data_offset, &eword); + if (ret_val) + goto err_eeprom; control = (eword & IXGBE_CONTROL_MASK_NL) >> IXGBE_CONTROL_SHIFT_NL; edata = eword & IXGBE_DATA_MASK_NL; @@ -803,10 +805,15 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) case IXGBE_DATA_NL: hw_dbg(hw, "DATA:\n"); data_offset++; - hw->eeprom.ops.read(hw, data_offset++, - &phy_offset); + ret_val = hw->eeprom.ops.read(hw, data_offset++, + &phy_offset); + if (ret_val) + goto err_eeprom; for (i = 0; i < edata; i++) { - hw->eeprom.ops.read(hw, data_offset, &eword); + ret_val = hw->eeprom.ops.read(hw, data_offset, + &eword); + if (ret_val) + goto err_eeprom; hw->phy.ops.write_reg(hw, phy_offset, MDIO_MMD_PMAPMD, eword); hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword, @@ -838,6 +845,10 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) out: return ret_val; + +err_eeprom: + hw_err(hw, "eeprom read at offset %d failed\n", data_offset); + return IXGBE_ERR_PHY; } /** @@ -1164,6 +1175,10 @@ s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) u8 comp_codes_10g = 0; u8 oui_bytes[3] = {0, 0, 0}; u16 enforce_sfp = 0; + u8 connector = 0; + u8 cable_length = 0; + u8 device_tech = 0; + bool active_cable = false; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) { hw->phy.sfp_type = ixgbe_sfp_type_not_present; @@ -1194,18 +1209,18 @@ s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) if (status != 0) goto err_read_i2c_eeprom; + status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_QSFP_1GBE_COMP, + &comp_codes_1g); + + if (status != 0) + goto err_read_i2c_eeprom; + if (comp_codes_10g & IXGBE_SFF_QSFP_DA_PASSIVE_CABLE) { hw->phy.type = ixgbe_phy_qsfp_passive_unknown; if (hw->bus.lan_id == 0) hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core0; else hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core1; - } else if (comp_codes_10g & IXGBE_SFF_QSFP_DA_ACTIVE_CABLE) { - hw->phy.type = ixgbe_phy_qsfp_active_unknown; - if (hw->bus.lan_id == 0) - hw->phy.sfp_type = ixgbe_sfp_type_da_act_lmt_core0; - else - hw->phy.sfp_type = ixgbe_sfp_type_da_act_lmt_core1; } else if (comp_codes_10g & (IXGBE_SFF_10GBASESR_CAPABLE | IXGBE_SFF_10GBASELR_CAPABLE)) { if (hw->bus.lan_id == 0) @@ -1213,10 +1228,47 @@ s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) else hw->phy.sfp_type = ixgbe_sfp_type_srlr_core1; } else { - /* unsupported module type */ - hw->phy.type = ixgbe_phy_sfp_unsupported; - status = IXGBE_ERR_SFP_NOT_SUPPORTED; - goto out; + if (comp_codes_10g & IXGBE_SFF_QSFP_DA_ACTIVE_CABLE) + active_cable = true; + + if (!active_cable) { + /* check for active DA cables that pre-date + * SFF-8436 v3.6 + */ + hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_QSFP_CONNECTOR, + &connector); + + hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_QSFP_CABLE_LENGTH, + &cable_length); + + hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_QSFP_DEVICE_TECH, + &device_tech); + + if ((connector == + IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE) && + (cable_length > 0) && + ((device_tech >> 4) == + IXGBE_SFF_QSFP_TRANSMITER_850NM_VCSEL)) + active_cable = true; + } + + if (active_cable) { + hw->phy.type = ixgbe_phy_qsfp_active_unknown; + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core1; + } else { + /* unsupported module type */ + hw->phy.type = ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } } if (hw->phy.sfp_type != stored_sfp_type) @@ -1271,7 +1323,7 @@ s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) status = 0; } else { if (hw->allow_unsupported_sfp == true) { - e_warn(hw, "WARNING: Intel (R) Network Connections are quality tested using Intel (R) Ethernet Optics. Using untested modules is not supported and may cause unstable operation or damage to the module or the adapter. Intel Corporation is not responsible for any harm caused by using untested modules.\n"); + e_warn(drv, "WARNING: Intel (R) Network Connections are quality tested using Intel (R) Ethernet Optics. Using untested modules is not supported and may cause unstable operation or damage to the module or the adapter. Intel Corporation is not responsible for any harm caused by using untested modules.\n"); status = 0; } else { hw_dbg(hw, @@ -1339,7 +1391,11 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, sfp_type = ixgbe_sfp_type_srlr_core1; /* Read offset to PHY init contents */ - hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset); + if (hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset)) { + hw_err(hw, "eeprom read at %d failed\n", + IXGBE_PHY_INIT_OFFSET_NL); + return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + } if ((!*list_offset) || (*list_offset == 0xFFFF)) return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; @@ -1351,12 +1407,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, * Find the matching SFP ID in the EEPROM * and program the init sequence */ - hw->eeprom.ops.read(hw, *list_offset, &sfp_id); + if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id)) + goto err_phy; while (sfp_id != IXGBE_PHY_INIT_END_NL) { if (sfp_id == sfp_type) { (*list_offset)++; - hw->eeprom.ops.read(hw, *list_offset, data_offset); + if (hw->eeprom.ops.read(hw, *list_offset, data_offset)) + goto err_phy; if ((!*data_offset) || (*data_offset == 0xFFFF)) { hw_dbg(hw, "SFP+ module not supported\n"); return IXGBE_ERR_SFP_NOT_SUPPORTED; @@ -1366,7 +1424,7 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, } else { (*list_offset) += 2; if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id)) - return IXGBE_ERR_PHY; + goto err_phy; } } @@ -1376,6 +1434,10 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, } return 0; + +err_phy: + hw_err(hw, "eeprom read at offset %d failed\n", *list_offset); + return IXGBE_ERR_PHY; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index 138dadd7cf33..24af12e3719e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -50,8 +50,11 @@ #define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE0 0xA5 #define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE1 0xA6 #define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE2 0xA7 +#define IXGBE_SFF_QSFP_CONNECTOR 0x82 #define IXGBE_SFF_QSFP_10GBE_COMP 0x83 #define IXGBE_SFF_QSFP_1GBE_COMP 0x86 +#define IXGBE_SFF_QSFP_CABLE_LENGTH 0x92 +#define IXGBE_SFF_QSFP_DEVICE_TECH 0x93 /* Bitmasks */ #define IXGBE_SFF_DA_PASSIVE_CABLE 0x4 @@ -68,6 +71,8 @@ #define IXGBE_SFF_ADDRESSING_MODE 0x4 #define IXGBE_SFF_QSFP_DA_ACTIVE_CABLE 0x1 #define IXGBE_SFF_QSFP_DA_PASSIVE_CABLE 0x8 +#define IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE 0x23 +#define IXGBE_SFF_QSFP_TRANSMITER_850NM_VCSEL 0x0 #define IXGBE_I2C_EEPROM_READ_MASK 0x100 #define IXGBE_I2C_EEPROM_STATUS_MASK 0x3 #define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 73c8e73bb6e7..276d7b135332 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -639,8 +639,8 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses; - u32 reg, msgbuf[4]; - u32 reg_offset, vf_shift; + u32 reg, reg_offset, vf_shift; + u32 msgbuf[4] = {0, 0, 0, 0}; u8 *addr = (u8 *)(&msgbuf[1]); e_info(probe, "VF Reset msg received from vf %d\n", vf); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 161ff18be775..6442cf8f9dce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1596,6 +1596,7 @@ enum { #define IXGBE_AUTOC2_10G_KR (0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) #define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) #define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_LINK_DISABLE_ON_D3_MASK 0x50000000 #define IXGBE_AUTOC2_LINK_DISABLE_MASK 0x70000000 #define IXGBE_MACC_FLU 0x00000001 @@ -2847,6 +2848,7 @@ struct ixgbe_mac_operations { void (*disable_tx_laser)(struct ixgbe_hw *); void (*enable_tx_laser)(struct ixgbe_hw *); void (*flap_tx_laser)(struct ixgbe_hw *); + void (*stop_link_on_d3)(struct ixgbe_hw *); s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool); s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool); s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index c35db735958f..7fb5677451f9 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2641,7 +2641,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) ret = mv643xx_eth_shared_of_probe(pdev); if (ret) return ret; - pd = pdev->dev.platform_data; + pd = dev_get_platdata(&pdev->dev); msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ? pd->tx_csum_limit : 9 * 1024; @@ -2833,7 +2833,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) struct resource *res; int err; - pd = pdev->dev.platform_data; + pd = dev_get_platdata(&pdev->dev); if (pd == NULL) { dev_err(&pdev->dev, "no mv643xx_eth_platform_data\n"); return -ENODEV; diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index db481477bcc5..4ae0c7426010 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -583,10 +583,9 @@ static int init_hash_table(struct pxa168_eth_private *pep) * table is full. */ if (pep->htpr == NULL) { - pep->htpr = dma_alloc_coherent(pep->dev->dev.parent, - HASH_ADDR_TABLE_SIZE, - &pep->htpr_dma, - GFP_KERNEL | __GFP_ZERO); + pep->htpr = dma_zalloc_coherent(pep->dev->dev.parent, + HASH_ADDR_TABLE_SIZE, + &pep->htpr_dma, GFP_KERNEL); if (pep->htpr == NULL) return -ENOMEM; } else { @@ -1024,9 +1023,9 @@ static int rxq_init(struct net_device *dev) pep->rx_desc_count = 0; size = pep->rx_ring_size * sizeof(struct rx_desc); pep->rx_desc_area_size = size; - pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, - &pep->rx_desc_dma, - GFP_KERNEL | __GFP_ZERO); + pep->p_rx_desc_area = dma_zalloc_coherent(pep->dev->dev.parent, size, + &pep->rx_desc_dma, + GFP_KERNEL); if (!pep->p_rx_desc_area) goto out; @@ -1085,9 +1084,9 @@ static int txq_init(struct net_device *dev) pep->tx_desc_count = 0; size = pep->tx_ring_size * sizeof(struct tx_desc); pep->tx_desc_area_size = size; - pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, - &pep->tx_desc_dma, - GFP_KERNEL | __GFP_ZERO); + pep->p_tx_desc_area = dma_zalloc_coherent(pep->dev->dev.parent, size, + &pep->tx_desc_dma, + GFP_KERNEL); if (!pep->p_tx_desc_area) goto out; /* Initialize the next_desc_ptr links in the Tx descriptors ring */ @@ -1517,7 +1516,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) printk(KERN_INFO "%s:Using random mac address\n", DRIVER_NAME); eth_hw_addr_random(dev); - pep->pd = pdev->dev.platform_data; + pep->pd = dev_get_platdata(&pdev->dev); pep->rx_ring_size = NUM_RX_DESCS; if (pep->pd->rx_queue_size) pep->rx_ring_size = pep->pd->rx_queue_size; diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index 94b3bd6fb5fd..0951f7aca1ef 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -1148,7 +1148,7 @@ static int ks8842_probe(struct platform_device *pdev) struct resource *iomem; struct net_device *netdev; struct ks8842_adapter *adapter; - struct ks8842_platform_data *pdata = pdev->dev.platform_data; + struct ks8842_platform_data *pdata = dev_get_platdata(&pdev->dev); u16 id; unsigned i; diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 9f3f5dbe1d30..0fba1532d326 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -1636,7 +1636,7 @@ static int ks8851_probe(struct platform_device *pdev) } else { struct ks8851_mll_platform_data *pdata; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (!pdata) { netdev_err(netdev, "No platform data\n"); err = -ENODEV; diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 50a1d4a04eb0..149355b52ad0 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3783,9 +3783,9 @@ static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) for (i = 0; i < mgp->num_slices; i++) { ss = &mgp->ss[i]; bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry); - ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes, - &ss->rx_done.bus, - GFP_KERNEL | __GFP_ZERO); + ss->rx_done.entry = dma_zalloc_coherent(&pdev->dev, bytes, + &ss->rx_done.bus, + GFP_KERNEL); if (ss->rx_done.entry == NULL) goto abort; bytes = sizeof(*ss->fw_stats); diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c index dc2c6f561e9a..235fd51100aa 100644 --- a/drivers/net/ethernet/netx-eth.c +++ b/drivers/net/ethernet/netx-eth.c @@ -390,7 +390,7 @@ static int netx_eth_drv_probe(struct platform_device *pdev) priv = netdev_priv(ndev); - pdata = (struct netxeth_platform_data *)pdev->dev.platform_data; + pdata = (struct netxeth_platform_data *)dev_get_platdata(&pdev->dev); priv->xc = request_xc(pdata->xcno, &pdev->dev); if (!priv->xc) { dev_err(&pdev->dev, "unable to request xc engine\n"); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index e19f1be60d5e..5a0f04c2c813 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1491,9 +1491,9 @@ pch_gbe_alloc_rx_buffers_pool(struct pch_gbe_adapter *adapter, bufsz = adapter->rx_buffer_len; size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY; - rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size, - &rx_ring->rx_buff_pool_logic, - GFP_KERNEL | __GFP_ZERO); + rx_ring->rx_buff_pool = + dma_zalloc_coherent(&pdev->dev, size, + &rx_ring->rx_buff_pool_logic, GFP_KERNEL); if (!rx_ring->rx_buff_pool) return -ENOMEM; @@ -1807,9 +1807,8 @@ int pch_gbe_setup_tx_resources(struct pch_gbe_adapter *adapter, tx_ring->size = tx_ring->count * (int)sizeof(struct pch_gbe_tx_desc); - tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size, - &tx_ring->dma, - GFP_KERNEL | __GFP_ZERO); + tx_ring->desc = dma_zalloc_coherent(&pdev->dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) { vfree(tx_ring->buffer_info); return -ENOMEM; @@ -1852,9 +1851,8 @@ int pch_gbe_setup_rx_resources(struct pch_gbe_adapter *adapter, return -ENOMEM; rx_ring->size = rx_ring->count * (int)sizeof(struct pch_gbe_rx_desc); - rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size, - &rx_ring->dma, - GFP_KERNEL | __GFP_ZERO); + rx_ring->desc = dma_zalloc_coherent(&pdev->dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { vfree(rx_ring->buffer_info); return -ENOMEM; diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index f21ae7b6c766..c498181a9aa8 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -440,10 +440,9 @@ static int pasemi_mac_setup_rx_resources(const struct net_device *dev) if (pasemi_dma_alloc_ring(&ring->chan, RX_RING_SIZE)) goto out_ring_desc; - ring->buffers = dma_alloc_coherent(&mac->dma_pdev->dev, - RX_RING_SIZE * sizeof(u64), - &ring->buf_dma, - GFP_KERNEL | __GFP_ZERO); + ring->buffers = dma_zalloc_coherent(&mac->dma_pdev->dev, + RX_RING_SIZE * sizeof(u64), + &ring->buf_dma, GFP_KERNEL); if (!ring->buffers) goto out_ring_desc; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 22bd425cfd84..461596096383 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -38,8 +38,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 3 -#define _QLCNIC_LINUX_SUBVERSION 49 -#define QLCNIC_LINUX_VERSIONID "5.3.49" +#define _QLCNIC_LINUX_SUBVERSION 50 +#define QLCNIC_LINUX_VERSIONID "5.3.50" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) @@ -392,7 +392,7 @@ struct qlcnic_dump_template_hdr { struct qlcnic_fw_dump { u8 clr; /* flag to indicate if dump is cleared */ - u8 enable; /* enable/disable dump */ + bool enable; /* enable/disable dump */ u32 size; /* total size of the dump */ void *data; /* dump data area */ struct qlcnic_dump_template_hdr *tmpl_hdr; @@ -463,7 +463,7 @@ struct qlcnic_hardware_context { struct qlcnic_fdt fdt; struct qlc_83xx_reset reset; struct qlc_83xx_idc idc; - struct qlc_83xx_fw_info fw_info; + struct qlc_83xx_fw_info *fw_info; struct qlcnic_intrpt_config *intr_tbl; struct qlcnic_sriov *sriov; u32 *reg_tbl; @@ -837,6 +837,7 @@ struct qlcnic_mac_list_s { #define QLCNIC_FW_CAP2_HW_LRO_IPV6 BIT_3 #define QLCNIC_FW_CAPABILITY_SET_DRV_VER BIT_5 #define QLCNIC_FW_CAPABILITY_2_BEACON BIT_7 +#define QLCNIC_FW_CAPABILITY_2_PER_PORT_ESWITCH_CFG BIT_8 /* module types */ #define LINKEVENT_MODULE_NOT_PRESENT 1 @@ -1184,6 +1185,7 @@ struct qlcnic_pci_info { }; struct qlcnic_npar_info { + bool eswitch_status; u16 pvid; u16 min_bw; u16 max_bw; @@ -1403,7 +1405,6 @@ struct qlcnic_esw_statistics { struct __qlcnic_esw_statistics tx; }; -#define QLCNIC_DUMP_MASK_DEF 0x1f #define QLCNIC_FORCE_FW_DUMP_KEY 0xdeadfeed #define QLCNIC_ENABLE_FW_DUMP 0xaddfeed #define QLCNIC_DISABLE_FW_DUMP 0xbadfeed @@ -1478,6 +1479,12 @@ int qlcnic_wol_supported(struct qlcnic_adapter *adapter); void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter); void qlcnic_delete_lb_filters(struct qlcnic_adapter *adapter); int qlcnic_dump_fw(struct qlcnic_adapter *); +int qlcnic_enable_fw_dump_state(struct qlcnic_adapter *); +bool qlcnic_check_fw_dump_state(struct qlcnic_adapter *); +pci_ers_result_t qlcnic_82xx_io_error_detected(struct pci_dev *, + pci_channel_state_t); +pci_ers_result_t qlcnic_82xx_io_slot_reset(struct pci_dev *); +void qlcnic_82xx_io_resume(struct pci_dev *); /* Functions from qlcnic_init.c */ void qlcnic_schedule_work(struct qlcnic_adapter *, work_func_t, int); @@ -1723,6 +1730,10 @@ struct qlcnic_hardware_ops { void (*set_mac_filter_count) (struct qlcnic_adapter *); void (*free_mac_list) (struct qlcnic_adapter *); int (*read_phys_port_id) (struct qlcnic_adapter *); + pci_ers_result_t (*io_error_detected) (struct pci_dev *, + pci_channel_state_t); + pci_ers_result_t (*io_slot_reset) (struct pci_dev *); + void (*io_resume) (struct pci_dev *); }; extern struct qlcnic_nic_template qlcnic_vf_ops; @@ -2069,6 +2080,14 @@ static inline bool qlcnic_82xx_check(struct qlcnic_adapter *adapter) return (device == PCI_DEVICE_ID_QLOGIC_QLE824X) ? true : false; } +static inline bool qlcnic_84xx_check(struct qlcnic_adapter *adapter) +{ + unsigned short device = adapter->pdev->device; + + return ((device == PCI_DEVICE_ID_QLOGIC_QLE844X) || + (device == PCI_DEVICE_ID_QLOGIC_VF_QLE844X)) ? true : false; +} + static inline bool qlcnic_83xx_check(struct qlcnic_adapter *adapter) { unsigned short device = adapter->pdev->device; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 8fce1d36a79c..a1818dae47b6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -11,6 +11,7 @@ #include <linux/ipv6.h> #include <linux/ethtool.h> #include <linux/interrupt.h> +#include <linux/aer.h> #define QLCNIC_MAX_TX_QUEUES 1 #define RSS_HASHTYPE_IP_TCP 0x3 @@ -177,6 +178,10 @@ static struct qlcnic_hardware_ops qlcnic_83xx_hw_ops = { .get_board_info = qlcnic_83xx_get_port_info, .set_mac_filter_count = qlcnic_83xx_set_mac_filter_count, .free_mac_list = qlcnic_82xx_free_mac_list, + .io_error_detected = qlcnic_83xx_io_error_detected, + .io_slot_reset = qlcnic_83xx_io_slot_reset, + .io_resume = qlcnic_83xx_io_resume, + }; static struct qlcnic_nic_template qlcnic_83xx_ops = { @@ -723,9 +728,8 @@ void qlcnic_dump_mbx(struct qlcnic_adapter *adapter, pr_info("\n"); } -static inline void -qlcnic_83xx_poll_for_mbx_completion(struct qlcnic_adapter *adapter, - struct qlcnic_cmd_args *cmd) +static void qlcnic_83xx_poll_for_mbx_completion(struct qlcnic_adapter *adapter, + struct qlcnic_cmd_args *cmd) { struct qlcnic_hardware_context *ahw = adapter->ahw; int opcode = LSW(cmd->req.arg[0]); @@ -2327,7 +2331,7 @@ int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *adapter, pci_info->tx_max_bw, pci_info->mac); } if (ahw->op_mode == QLCNIC_MGMT_FUNC) - dev_info(dev, "Max vNIC functions = %d, active vNIC functions = %d\n", + dev_info(dev, "Max functions = %d, active functions = %d\n", ahw->max_pci_func, ahw->act_pci_func); } else { @@ -3544,7 +3548,7 @@ qlcnic_83xx_notify_cmd_completion(struct qlcnic_adapter *adapter, complete(&cmd->completion); } -static inline void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) +static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; struct list_head *head = &mbx->cmd_q; @@ -3564,7 +3568,7 @@ static inline void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) spin_unlock(&mbx->queue_lock); } -static inline int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) +static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) { struct qlcnic_hardware_context *ahw = adapter->ahw; struct qlcnic_mailbox *mbx = ahw->mailbox; @@ -3592,8 +3596,8 @@ static inline void qlcnic_83xx_signal_mbx_cmd(struct qlcnic_adapter *adapter, QLCWRX(adapter->ahw, QLCNIC_FW_MBX_CTRL, QLCNIC_CLR_OWNER); } -static inline void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, - struct qlcnic_cmd_args *cmd) +static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, + struct qlcnic_cmd_args *cmd) { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; @@ -3653,9 +3657,9 @@ void qlcnic_83xx_detach_mailbox_work(struct qlcnic_adapter *adapter) qlcnic_83xx_flush_mbx_queue(adapter); } -static inline int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, - struct qlcnic_cmd_args *cmd, - unsigned long *timeout) +static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, + struct qlcnic_cmd_args *cmd, + unsigned long *timeout) { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; @@ -3680,8 +3684,8 @@ static inline int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, return -EBUSY; } -static inline int qlcnic_83xx_check_mac_rcode(struct qlcnic_adapter *adapter, - struct qlcnic_cmd_args *cmd) +static int qlcnic_83xx_check_mac_rcode(struct qlcnic_adapter *adapter, + struct qlcnic_cmd_args *cmd) { u8 mac_cmd_rcode; u32 fw_data; @@ -3820,3 +3824,57 @@ int qlcnic_83xx_init_mailbox_work(struct qlcnic_adapter *adapter) set_bit(QLC_83XX_MBX_READY, &mbx->status); return 0; } + +pci_ers_result_t qlcnic_83xx_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + if (state == pci_channel_io_normal) + return PCI_ERS_RESULT_RECOVERED; + + set_bit(__QLCNIC_AER, &adapter->state); + set_bit(__QLCNIC_RESETTING, &adapter->state); + + qlcnic_83xx_aer_stop_poll_work(adapter); + + pci_save_state(pdev); + pci_disable_device(pdev); + + return PCI_ERS_RESULT_NEED_RESET; +} + +pci_ers_result_t qlcnic_83xx_io_slot_reset(struct pci_dev *pdev) +{ + struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); + int err = 0; + + pdev->error_state = pci_channel_io_normal; + err = pci_enable_device(pdev); + if (err) + goto disconnect; + + pci_set_power_state(pdev, PCI_D0); + pci_set_master(pdev); + pci_restore_state(pdev); + + err = qlcnic_83xx_aer_reset(adapter); + if (err == 0) + return PCI_ERS_RESULT_RECOVERED; +disconnect: + clear_bit(__QLCNIC_AER, &adapter->state); + clear_bit(__QLCNIC_RESETTING, &adapter->state); + return PCI_ERS_RESULT_DISCONNECT; +} + +void qlcnic_83xx_io_resume(struct pci_dev *pdev) +{ + struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); + + pci_cleanup_aer_uncorrect_error_status(pdev); + if (test_and_clear_bit(__QLCNIC_AER, &adapter->state)) + qlcnic_83xx_aer_start_poll_work(adapter); +} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index 0fc56160d584..533e150503af 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -274,11 +274,7 @@ struct qlcnic_macvlan_mbx { struct qlc_83xx_fw_info { const struct firmware *fw; - u16 major_fw_version; - u8 minor_fw_version; - u8 sub_fw_version; - u8 fw_build_num; - u8 load_from_file; + char fw_file_name[QLC_FW_FILE_NAME_LEN]; }; struct qlc_83xx_reset { @@ -297,6 +293,7 @@ struct qlc_83xx_reset { #define QLC_83XX_IDC_DISABLE_FW_RESET_RECOVERY 0x1 #define QLC_83XX_IDC_GRACEFULL_RESET 0x2 +#define QLC_83XX_IDC_DISABLE_FW_DUMP 0x4 #define QLC_83XX_IDC_TIMESTAMP 0 #define QLC_83XX_IDC_DURATION 1 #define QLC_83XX_IDC_INIT_TIMEOUT_SECS 30 @@ -414,6 +411,7 @@ enum qlcnic_83xx_states { #define QLC_83XX_GET_HW_LRO_CAPABILITY(val) (val & 0x400) #define QLC_83XX_GET_VLAN_ALIGN_CAPABILITY(val) (val & 0x4000) #define QLC_83XX_GET_FW_LRO_MSS_CAPABILITY(val) (val & 0x20000) +#define QLC_83XX_ESWITCH_CAPABILITY BIT_23 #define QLC_83XX_VIRTUAL_NIC_MODE 0xFF #define QLC_83XX_DEFAULT_MODE 0x0 #define QLC_83XX_SRIOV_MODE 0x1 @@ -628,6 +626,7 @@ int qlcnic_83xx_config_vnic_opmode(struct qlcnic_adapter *); int qlcnic_83xx_get_vnic_vport_info(struct qlcnic_adapter *, struct qlcnic_info *, u8); int qlcnic_83xx_get_vnic_pf_info(struct qlcnic_adapter *, struct qlcnic_info *); +int qlcnic_83xx_enable_port_eswitch(struct qlcnic_adapter *, int); void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *); void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data); @@ -656,4 +655,11 @@ int qlcnic_83xx_idc_init(struct qlcnic_adapter *); int qlcnic_83xx_idc_reattach_driver(struct qlcnic_adapter *); int qlcnic_83xx_set_vnic_opmode(struct qlcnic_adapter *); int qlcnic_83xx_check_vnic_state(struct qlcnic_adapter *); +void qlcnic_83xx_aer_stop_poll_work(struct qlcnic_adapter *); +int qlcnic_83xx_aer_reset(struct qlcnic_adapter *); +void qlcnic_83xx_aer_start_poll_work(struct qlcnic_adapter *); +pci_ers_result_t qlcnic_83xx_io_error_detected(struct pci_dev *, + pci_channel_state_t); +pci_ers_result_t qlcnic_83xx_io_slot_reset(struct pci_dev *); +void qlcnic_83xx_io_resume(struct pci_dev *); #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index a969ac27633c..f09e787af0b2 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -797,7 +797,6 @@ static int qlcnic_83xx_idc_init_state(struct qlcnic_adapter *adapter) ret = qlcnic_83xx_idc_restart_hw(adapter, 1); } else { ret = qlcnic_83xx_idc_check_timeout(adapter, timeout); - return ret; } return ret; @@ -1268,31 +1267,33 @@ static int qlcnic_83xx_copy_bootloader(struct qlcnic_adapter *adapter) static int qlcnic_83xx_copy_fw_file(struct qlcnic_adapter *adapter) { + struct qlc_83xx_fw_info *fw_info = adapter->ahw->fw_info; + const struct firmware *fw = fw_info->fw; u32 dest, *p_cache; - u64 addr; + int i, ret = -EIO; u8 data[16]; size_t size; - int i, ret = -EIO; + u64 addr; dest = QLCRDX(adapter->ahw, QLCNIC_FW_IMAGE_ADDR); - size = (adapter->ahw->fw_info.fw->size & ~0xF); - p_cache = (u32 *)adapter->ahw->fw_info.fw->data; + size = (fw->size & ~0xF); + p_cache = (u32 *)fw->data; addr = (u64)dest; ret = qlcnic_83xx_ms_mem_write128(adapter, addr, (u32 *)p_cache, size / 16); if (ret) { dev_err(&adapter->pdev->dev, "MS memory write failed\n"); - release_firmware(adapter->ahw->fw_info.fw); - adapter->ahw->fw_info.fw = NULL; + release_firmware(fw); + fw_info->fw = NULL; return -EIO; } /* alignment check */ - if (adapter->ahw->fw_info.fw->size & 0xF) { + if (fw->size & 0xF) { addr = dest + size; - for (i = 0; i < (adapter->ahw->fw_info.fw->size & 0xF); i++) - data[i] = adapter->ahw->fw_info.fw->data[size + i]; + for (i = 0; i < (fw->size & 0xF); i++) + data[i] = fw->data[size + i]; for (; i < 16; i++) data[i] = 0; ret = qlcnic_83xx_ms_mem_write128(adapter, addr, @@ -1300,13 +1301,13 @@ static int qlcnic_83xx_copy_fw_file(struct qlcnic_adapter *adapter) if (ret) { dev_err(&adapter->pdev->dev, "MS memory write failed\n"); - release_firmware(adapter->ahw->fw_info.fw); - adapter->ahw->fw_info.fw = NULL; + release_firmware(fw); + fw_info->fw = NULL; return -EIO; } } - release_firmware(adapter->ahw->fw_info.fw); - adapter->ahw->fw_info.fw = NULL; + release_firmware(fw); + fw_info->fw = NULL; return 0; } @@ -1950,35 +1951,12 @@ static void qlcnic_83xx_init_hw(struct qlcnic_adapter *p_dev) dev_err(&p_dev->pdev->dev, "%s: failed\n", __func__); } -static inline void qlcnic_83xx_get_fw_file_name(struct qlcnic_adapter *adapter, - char *file_name) -{ - struct pci_dev *pdev = adapter->pdev; - - memset(file_name, 0, QLC_FW_FILE_NAME_LEN); - - switch (pdev->device) { - case PCI_DEVICE_ID_QLOGIC_QLE834X: - strncpy(file_name, QLC_83XX_FW_FILE_NAME, - QLC_FW_FILE_NAME_LEN); - break; - case PCI_DEVICE_ID_QLOGIC_QLE844X: - strncpy(file_name, QLC_84XX_FW_FILE_NAME, - QLC_FW_FILE_NAME_LEN); - break; - default: - dev_err(&pdev->dev, "%s: Invalid device id\n", - __func__); - } -} - static int qlcnic_83xx_load_fw_image_from_host(struct qlcnic_adapter *adapter) { - char fw_file_name[QLC_FW_FILE_NAME_LEN]; + struct qlc_83xx_fw_info *fw_info = adapter->ahw->fw_info; int err = -EIO; - qlcnic_83xx_get_fw_file_name(adapter, fw_file_name); - if (request_firmware(&adapter->ahw->fw_info.fw, fw_file_name, + if (request_firmware(&fw_info->fw, fw_info->fw_file_name, &(adapter->pdev->dev))) { dev_err(&adapter->pdev->dev, "No file FW image, loading flash FW image.\n"); @@ -2025,36 +2003,6 @@ static int qlcnic_83xx_restart_hw(struct qlcnic_adapter *adapter) return 0; } -/** -* qlcnic_83xx_config_default_opmode -* -* @adapter: adapter structure -* -* Configure default driver operating mode -* -* Returns: Error code or Success(0) -* */ -int qlcnic_83xx_config_default_opmode(struct qlcnic_adapter *adapter) -{ - u32 op_mode; - struct qlcnic_hardware_context *ahw = adapter->ahw; - - qlcnic_get_func_no(adapter); - op_mode = QLCRDX(ahw, QLC_83XX_DRV_OP_MODE); - - if (test_bit(__QLCNIC_SRIOV_CAPABLE, &adapter->state)) - op_mode = QLC_83XX_DEFAULT_OPMODE; - - if (op_mode == QLC_83XX_DEFAULT_OPMODE) { - adapter->nic_ops->init_driver = qlcnic_83xx_init_default_driver; - ahw->idc.state_entry = qlcnic_83xx_idc_ready_state_entry; - } else { - return -EIO; - } - - return 0; -} - int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) { int err; @@ -2074,26 +2022,26 @@ int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) ahw->max_mac_filters = nic_info.max_mac_filters; ahw->max_mtu = nic_info.max_mtu; - /* VNIC mode is detected by BIT_23 in capabilities. This bit is also - * set in case device is SRIOV capable. VNIC and SRIOV are mutually - * exclusive. So in case of sriov capable device load driver in - * default mode + /* eSwitch capability indicates vNIC mode. + * vNIC and SRIOV are mutually exclusive operational modes. + * If SR-IOV capability is detected, SR-IOV physical function + * will get initialized in default mode. + * SR-IOV virtual function initialization follows a + * different code path and opmode. + * SRIOV mode has precedence over vNIC mode. */ - if (test_bit(__QLCNIC_SRIOV_CAPABLE, &adapter->state)) { - ahw->nic_mode = QLC_83XX_DEFAULT_MODE; - return ahw->nic_mode; - } + if (test_bit(__QLCNIC_SRIOV_CAPABLE, &adapter->state)) + return QLC_83XX_DEFAULT_OPMODE; - if (ahw->capabilities & BIT_23) - ahw->nic_mode = QLC_83XX_VIRTUAL_NIC_MODE; - else - ahw->nic_mode = QLC_83XX_DEFAULT_MODE; + if (ahw->capabilities & QLC_83XX_ESWITCH_CAPABILITY) + return QLC_83XX_VIRTUAL_NIC_MODE; - return ahw->nic_mode; + return QLC_83XX_DEFAULT_OPMODE; } int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter) { + struct qlcnic_hardware_context *ahw = adapter->ahw; int ret; ret = qlcnic_83xx_get_nic_configuration(adapter); @@ -2101,11 +2049,16 @@ int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter) return -EIO; if (ret == QLC_83XX_VIRTUAL_NIC_MODE) { + ahw->nic_mode = QLC_83XX_VIRTUAL_NIC_MODE; if (qlcnic_83xx_config_vnic_opmode(adapter)) return -EIO; - } else if (ret == QLC_83XX_DEFAULT_MODE) { - if (qlcnic_83xx_config_default_opmode(adapter)) - return -EIO; + + } else if (ret == QLC_83XX_DEFAULT_OPMODE) { + ahw->nic_mode = QLC_83XX_DEFAULT_MODE; + adapter->nic_ops->init_driver = qlcnic_83xx_init_default_driver; + ahw->idc.state_entry = qlcnic_83xx_idc_ready_state_entry; + } else { + return -EIO; } return 0; @@ -2174,6 +2127,39 @@ static void qlcnic_83xx_clear_function_resources(struct qlcnic_adapter *adapter) } } +static int qlcnic_83xx_get_fw_info(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct pci_dev *pdev = adapter->pdev; + struct qlc_83xx_fw_info *fw_info; + int err = 0; + + ahw->fw_info = kzalloc(sizeof(*fw_info), GFP_KERNEL); + if (!ahw->fw_info) { + err = -ENOMEM; + } else { + fw_info = ahw->fw_info; + switch (pdev->device) { + case PCI_DEVICE_ID_QLOGIC_QLE834X: + strncpy(fw_info->fw_file_name, QLC_83XX_FW_FILE_NAME, + QLC_FW_FILE_NAME_LEN); + break; + case PCI_DEVICE_ID_QLOGIC_QLE844X: + strncpy(fw_info->fw_file_name, QLC_84XX_FW_FILE_NAME, + QLC_FW_FILE_NAME_LEN); + break; + default: + dev_err(&pdev->dev, "%s: Invalid device id\n", + __func__); + err = -EINVAL; + break; + } + } + + return err; +} + + int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) { struct qlcnic_hardware_context *ahw = adapter->ahw; @@ -2199,10 +2185,14 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) if (!qlcnic_83xx_read_flash_descriptor_table(adapter)) qlcnic_83xx_read_flash_mfg_id(adapter); - err = qlcnic_83xx_idc_init(adapter); + err = qlcnic_83xx_get_fw_info(adapter); if (err) goto detach_mbx; + err = qlcnic_83xx_idc_init(adapter); + if (err) + goto clear_fw_info; + err = qlcnic_setup_intr(adapter, 0, 0); if (err) { dev_err(&adapter->pdev->dev, "Failed to setup interrupt\n"); @@ -2243,9 +2233,67 @@ disable_mbx_intr: disable_intr: qlcnic_teardown_intr(adapter); +clear_fw_info: + kfree(ahw->fw_info); + detach_mbx: qlcnic_83xx_detach_mailbox_work(adapter); qlcnic_83xx_free_mailbox(ahw->mailbox); exit: return err; } + +void qlcnic_83xx_aer_stop_poll_work(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct qlc_83xx_idc *idc = &ahw->idc; + + clear_bit(QLC_83XX_MBX_READY, &idc->status); + cancel_delayed_work_sync(&adapter->fw_work); + + if (ahw->nic_mode == QLC_83XX_VIRTUAL_NIC_MODE) + qlcnic_83xx_disable_vnic_mode(adapter, 1); + + qlcnic_83xx_idc_detach_driver(adapter); + qlcnic_83xx_register_nic_idc_func(adapter, 0); + + cancel_delayed_work_sync(&adapter->idc_aen_work); +} + +int qlcnic_83xx_aer_reset(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct qlc_83xx_idc *idc = &ahw->idc; + int ret = 0; + u32 owner; + + /* Mark the previous IDC state as NEED_RESET so + * that state_entry() will perform the reattachment + * and bringup the device + */ + idc->prev_state = QLC_83XX_IDC_DEV_NEED_RESET; + owner = qlcnic_83xx_idc_find_reset_owner_id(adapter); + if (ahw->pci_func == owner) { + ret = qlcnic_83xx_restart_hw(adapter); + if (ret < 0) + return ret; + qlcnic_83xx_idc_clear_registers(adapter, 0); + } + + ret = idc->state_entry(adapter); + return ret; +} + +void qlcnic_83xx_aer_start_poll_work(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct qlc_83xx_idc *idc = &ahw->idc; + u32 owner; + + idc->prev_state = QLC_83XX_IDC_DEV_READY; + owner = qlcnic_83xx_idc_find_reset_owner_id(adapter); + if (ahw->pci_func == owner) + qlcnic_83xx_idc_enter_ready_state(adapter, 0); + + qlcnic_schedule_work(adapter, qlcnic_83xx_idc_poll_dev_state, 0); +} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c index 599d1fda52f2..0248a4c2f5dd 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c @@ -208,7 +208,7 @@ int qlcnic_83xx_config_vnic_opmode(struct qlcnic_adapter *adapter) return -EIO; } - if (ahw->capabilities & BIT_23) + if (ahw->capabilities & QLC_83XX_ESWITCH_CAPABILITY) adapter->flags |= QLCNIC_ESWITCH_ENABLED; else adapter->flags &= ~QLCNIC_ESWITCH_ENABLED; @@ -239,3 +239,41 @@ int qlcnic_83xx_check_vnic_state(struct qlcnic_adapter *adapter) return 0; } + +static int qlcnic_83xx_get_eswitch_port_info(struct qlcnic_adapter *adapter, + int func, int *port_id) +{ + struct qlcnic_info nic_info; + int err = 0; + + memset(&nic_info, 0, sizeof(struct qlcnic_info)); + + err = qlcnic_get_nic_info(adapter, &nic_info, func); + if (err) + return err; + + if (nic_info.capabilities & QLC_83XX_ESWITCH_CAPABILITY) + *port_id = nic_info.phys_port; + else + err = -EIO; + + return err; +} + +int qlcnic_83xx_enable_port_eswitch(struct qlcnic_adapter *adapter, int func) +{ + int id, err = 0; + + err = qlcnic_83xx_get_eswitch_port_info(adapter, func, &id); + if (err) + return err; + + if (!(adapter->eswitch[id].flags & QLCNIC_SWITCH_ENABLE)) { + if (!qlcnic_enable_eswitch(adapter, id, 1)) + adapter->eswitch[id].flags |= QLCNIC_SWITCH_ENABLE; + else + err = -EIO; + } + + return err; +} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index bf3b17eaf8b8..86850dd633a1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -448,14 +448,14 @@ int qlcnic_82xx_fw_cmd_create_tx_ctx(struct qlcnic_adapter *adapter, *(tx_ring->hw_consumer) = 0; rq_size = SIZEOF_HOSTRQ_TX(struct qlcnic_hostrq_tx_ctx); - rq_addr = dma_alloc_coherent(&adapter->pdev->dev, rq_size, - &rq_phys_addr, GFP_KERNEL | __GFP_ZERO); + rq_addr = dma_zalloc_coherent(&adapter->pdev->dev, rq_size, + &rq_phys_addr, GFP_KERNEL); if (!rq_addr) return -ENOMEM; rsp_size = SIZEOF_CARDRSP_TX(struct qlcnic_cardrsp_tx_ctx); - rsp_addr = dma_alloc_coherent(&adapter->pdev->dev, rsp_size, - &rsp_phys_addr, GFP_KERNEL | __GFP_ZERO); + rsp_addr = dma_zalloc_coherent(&adapter->pdev->dev, rsp_size, + &rsp_phys_addr, GFP_KERNEL); if (!rsp_addr) { err = -ENOMEM; goto out_free_rq; @@ -865,8 +865,8 @@ int qlcnic_82xx_get_nic_info(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args cmd; size_t nic_size = sizeof(struct qlcnic_info_le); - nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size, - &nic_dma_t, GFP_KERNEL | __GFP_ZERO); + nic_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, nic_size, + &nic_dma_t, GFP_KERNEL); if (!nic_info_addr) return -ENOMEM; @@ -919,8 +919,8 @@ int qlcnic_82xx_set_nic_info(struct qlcnic_adapter *adapter, if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC) return err; - nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size, - &nic_dma_t, GFP_KERNEL | __GFP_ZERO); + nic_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, nic_size, + &nic_dma_t, GFP_KERNEL); if (!nic_info_addr) return -ENOMEM; @@ -972,9 +972,8 @@ int qlcnic_82xx_get_pci_info(struct qlcnic_adapter *adapter, size_t npar_size = sizeof(struct qlcnic_pci_info_le); size_t pci_size = npar_size * QLCNIC_MAX_PCI_FUNC; - pci_info_addr = dma_alloc_coherent(&adapter->pdev->dev, pci_size, - &pci_info_dma_t, - GFP_KERNEL | __GFP_ZERO); + pci_info_addr = dma_zalloc_coherent(&adapter->pdev->dev, pci_size, + &pci_info_dma_t, GFP_KERNEL); if (!pci_info_addr) return -ENOMEM; @@ -1074,8 +1073,8 @@ int qlcnic_get_port_stats(struct qlcnic_adapter *adapter, const u8 func, return -EIO; } - stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size, - &stats_dma_t, GFP_KERNEL | __GFP_ZERO); + stats_addr = dma_zalloc_coherent(&adapter->pdev->dev, stats_size, + &stats_dma_t, GFP_KERNEL); if (!stats_addr) return -ENOMEM; @@ -1130,8 +1129,8 @@ int qlcnic_get_mac_stats(struct qlcnic_adapter *adapter, if (mac_stats == NULL) return -ENOMEM; - stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size, - &stats_dma_t, GFP_KERNEL | __GFP_ZERO); + stats_addr = dma_zalloc_coherent(&adapter->pdev->dev, stats_size, + &stats_dma_t, GFP_KERNEL); if (!stats_addr) return -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 7b0c90efb365..332aa71798f6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1509,6 +1509,68 @@ static void qlcnic_set_msglevel(struct net_device *netdev, u32 msglvl) adapter->ahw->msg_enable = msglvl; } +int qlcnic_enable_fw_dump_state(struct qlcnic_adapter *adapter) +{ + struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; + u32 val; + + if (qlcnic_84xx_check(adapter)) { + if (qlcnic_83xx_lock_driver(adapter)) + return -EBUSY; + + val = QLCRDX(adapter->ahw, QLC_83XX_IDC_CTRL); + val &= ~QLC_83XX_IDC_DISABLE_FW_DUMP; + QLCWRX(adapter->ahw, QLC_83XX_IDC_CTRL, val); + + qlcnic_83xx_unlock_driver(adapter); + } else { + fw_dump->enable = true; + } + + dev_info(&adapter->pdev->dev, "FW dump enabled\n"); + + return 0; +} + +static int qlcnic_disable_fw_dump_state(struct qlcnic_adapter *adapter) +{ + struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; + u32 val; + + if (qlcnic_84xx_check(adapter)) { + if (qlcnic_83xx_lock_driver(adapter)) + return -EBUSY; + + val = QLCRDX(adapter->ahw, QLC_83XX_IDC_CTRL); + val |= QLC_83XX_IDC_DISABLE_FW_DUMP; + QLCWRX(adapter->ahw, QLC_83XX_IDC_CTRL, val); + + qlcnic_83xx_unlock_driver(adapter); + } else { + fw_dump->enable = false; + } + + dev_info(&adapter->pdev->dev, "FW dump disabled\n"); + + return 0; +} + +bool qlcnic_check_fw_dump_state(struct qlcnic_adapter *adapter) +{ + struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; + bool state; + u32 val; + + if (qlcnic_84xx_check(adapter)) { + val = QLCRDX(adapter->ahw, QLC_83XX_IDC_CTRL); + state = (val & QLC_83XX_IDC_DISABLE_FW_DUMP) ? false : true; + } else { + state = fw_dump->enable; + } + + return state; +} + static int qlcnic_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump) { @@ -1525,7 +1587,7 @@ qlcnic_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump) else dump->len = 0; - if (!fw_dump->enable) + if (!qlcnic_check_fw_dump_state(adapter)) dump->flag = ETH_FW_DUMP_DISABLE; else dump->flag = fw_dump->tmpl_hdr->drv_cap_mask; @@ -1573,77 +1635,111 @@ qlcnic_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump, return 0; } +static int qlcnic_set_dump_mask(struct qlcnic_adapter *adapter, u32 mask) +{ + struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; + struct net_device *netdev = adapter->netdev; + + if (!qlcnic_check_fw_dump_state(adapter)) { + netdev_info(netdev, + "Can not change driver mask to 0x%x. FW dump not enabled\n", + mask); + return -EOPNOTSUPP; + } + + fw_dump->tmpl_hdr->drv_cap_mask = mask; + netdev_info(netdev, "Driver mask changed to: 0x%x\n", mask); + return 0; +} + static int qlcnic_set_dump(struct net_device *netdev, struct ethtool_dump *val) { - int i; struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; + bool valid_mask = false; + int i, ret = 0; u32 state; switch (val->flag) { case QLCNIC_FORCE_FW_DUMP_KEY: if (!fw_dump->tmpl_hdr) { netdev_err(netdev, "FW dump not supported\n"); - return -ENOTSUPP; + ret = -EOPNOTSUPP; + break; } - if (!fw_dump->enable) { + + if (!qlcnic_check_fw_dump_state(adapter)) { netdev_info(netdev, "FW dump not enabled\n"); - return 0; + ret = -EOPNOTSUPP; + break; } + if (fw_dump->clr) { netdev_info(netdev, - "Previous dump not cleared, not forcing dump\n"); - return 0; + "Previous dump not cleared, not forcing dump\n"); + break; } + netdev_info(netdev, "Forcing a FW dump\n"); qlcnic_dev_request_reset(adapter, val->flag); break; case QLCNIC_DISABLE_FW_DUMP: - if (fw_dump->enable && fw_dump->tmpl_hdr) { - netdev_info(netdev, "Disabling FW dump\n"); - fw_dump->enable = 0; + if (!fw_dump->tmpl_hdr) { + netdev_err(netdev, "FW dump not supported\n"); + ret = -EOPNOTSUPP; + break; } - return 0; + + ret = qlcnic_disable_fw_dump_state(adapter); + break; + case QLCNIC_ENABLE_FW_DUMP: if (!fw_dump->tmpl_hdr) { netdev_err(netdev, "FW dump not supported\n"); - return -ENOTSUPP; - } - if (!fw_dump->enable) { - netdev_info(netdev, "Enabling FW dump\n"); - fw_dump->enable = 1; + ret = -EOPNOTSUPP; + break; } - return 0; + + ret = qlcnic_enable_fw_dump_state(adapter); + break; + case QLCNIC_FORCE_FW_RESET: netdev_info(netdev, "Forcing a FW reset\n"); qlcnic_dev_request_reset(adapter, val->flag); adapter->flags &= ~QLCNIC_FW_RESET_OWNER; - return 0; + break; +; case QLCNIC_SET_QUIESCENT: case QLCNIC_RESET_QUIESCENT: state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); if (state == QLCNIC_DEV_FAILED || (state == QLCNIC_DEV_BADBAD)) netdev_info(netdev, "Device in FAILED state\n"); - return 0; + break; + default: if (!fw_dump->tmpl_hdr) { netdev_err(netdev, "FW dump not supported\n"); - return -ENOTSUPP; + ret = -EOPNOTSUPP; + break; } + for (i = 0; i < ARRAY_SIZE(qlcnic_fw_dump_level); i++) { if (val->flag == qlcnic_fw_dump_level[i]) { - fw_dump->tmpl_hdr->drv_cap_mask = - val->flag; - netdev_info(netdev, "Driver mask changed to: 0x%x\n", - fw_dump->tmpl_hdr->drv_cap_mask); - return 0; + valid_mask = true; + break; } } - netdev_info(netdev, "Invalid dump level: 0x%x\n", val->flag); - return -EINVAL; + + if (valid_mask) { + ret = qlcnic_set_dump_mask(adapter, val->flag); + } else { + netdev_info(netdev, "Invalid dump level: 0x%x\n", + val->flag); + ret = -EINVAL; + } } - return 0; + return ret; } const struct ethtool_ops qlcnic_ethtool_ops = { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 7dde3ba6d9a4..c4c5023e1fdf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -540,6 +540,9 @@ static struct qlcnic_hardware_ops qlcnic_hw_ops = { .set_mac_filter_count = qlcnic_82xx_set_mac_filter_count, .free_mac_list = qlcnic_82xx_free_mac_list, .read_phys_port_id = qlcnic_82xx_read_phys_port_id, + .io_error_detected = qlcnic_82xx_io_error_detected, + .io_slot_reset = qlcnic_82xx_io_slot_reset, + .io_resume = qlcnic_82xx_io_resume, }; static void qlcnic_get_multiq_capability(struct qlcnic_adapter *adapter) @@ -793,6 +796,23 @@ static int qlcnic_get_act_pci_func(struct qlcnic_adapter *adapter) return ret; } +static bool qlcnic_port_eswitch_cfg_capability(struct qlcnic_adapter *adapter) +{ + bool ret = false; + + if (qlcnic_84xx_check(adapter)) { + ret = true; + } else if (qlcnic_83xx_check(adapter)) { + if (adapter->ahw->extra_capability[0] & + QLCNIC_FW_CAPABILITY_2_PER_PORT_ESWITCH_CFG) + ret = true; + else + ret = false; + } + + return ret; +} + int qlcnic_init_pci_info(struct qlcnic_adapter *adapter) { struct qlcnic_pci_info *pci_info; @@ -836,18 +856,30 @@ int qlcnic_init_pci_info(struct qlcnic_adapter *adapter) (pci_info[i].type != QLCNIC_TYPE_NIC)) continue; + if (qlcnic_port_eswitch_cfg_capability(adapter)) { + if (!qlcnic_83xx_enable_port_eswitch(adapter, pfn)) + adapter->npars[j].eswitch_status = true; + else + continue; + } else { + adapter->npars[j].eswitch_status = true; + } + adapter->npars[j].pci_func = pfn; adapter->npars[j].active = (u8)pci_info[i].active; adapter->npars[j].type = (u8)pci_info[i].type; adapter->npars[j].phy_port = (u8)pci_info[i].default_port; adapter->npars[j].min_bw = pci_info[i].tx_min_bw; adapter->npars[j].max_bw = pci_info[i].tx_max_bw; + j++; } - for (i = 0; i < QLCNIC_NIU_MAX_XG_PORTS; i++) { - adapter->eswitch[i].flags |= QLCNIC_SWITCH_ENABLE; - if (qlcnic_83xx_check(adapter)) + if (qlcnic_82xx_check(adapter)) { + for (i = 0; i < QLCNIC_NIU_MAX_XG_PORTS; i++) + adapter->eswitch[i].flags |= QLCNIC_SWITCH_ENABLE; + } else if (!qlcnic_port_eswitch_cfg_capability(adapter)) { + for (i = 0; i < QLCNIC_NIU_MAX_XG_PORTS; i++) qlcnic_enable_eswitch(adapter, i, 1); } @@ -969,8 +1001,8 @@ static int qlcnic_setup_pci_map(struct pci_dev *pdev, return 0; } -static inline bool qlcnic_validate_subsystem_id(struct qlcnic_adapter *adapter, - int index) +static bool qlcnic_validate_subsystem_id(struct qlcnic_adapter *adapter, + int index) { struct pci_dev *pdev = adapter->pdev; unsigned short subsystem_vendor; @@ -1272,6 +1304,9 @@ int qlcnic_set_default_offload_settings(struct qlcnic_adapter *adapter) return 0; for (i = 0; i < adapter->ahw->act_pci_func; i++) { + if (!adapter->npars[i].eswitch_status) + continue; + memset(&esw_cfg, 0, sizeof(struct qlcnic_esw_func_cfg)); esw_cfg.pci_func = adapter->npars[i].pci_func; esw_cfg.mac_override = BIT_0; @@ -1334,6 +1369,9 @@ int qlcnic_reset_npar_config(struct qlcnic_adapter *adapter) for (i = 0; i < adapter->ahw->act_pci_func; i++) { npar = &adapter->npars[i]; pci_func = npar->pci_func; + if (!adapter->npars[i].eswitch_status) + continue; + memset(&nic_info, 0, sizeof(struct qlcnic_info)); err = qlcnic_get_nic_info(adapter, &nic_info, pci_func); if (err) @@ -2399,6 +2437,7 @@ static void qlcnic_remove(struct pci_dev *pdev) qlcnic_83xx_free_mbx_intr(adapter); qlcnic_83xx_detach_mailbox_work(adapter); qlcnic_83xx_free_mailbox(ahw->mailbox); + kfree(ahw->fw_info); } qlcnic_detach(adapter); @@ -3045,7 +3084,7 @@ skip_ack_check: qlcnic_api_unlock(adapter); rtnl_lock(); - if (adapter->ahw->fw_dump.enable && + if (qlcnic_check_fw_dump_state(adapter) && (adapter->flags & QLCNIC_FW_RESET_OWNER)) { QLCDB(adapter, DRV, "Take FW dump\n"); qlcnic_dump_fw(adapter); @@ -3431,19 +3470,6 @@ static int qlcnic_attach_func(struct pci_dev *pdev) return err; } - if (qlcnic_83xx_check(adapter)) { - /* register for NIC IDC AEN Events */ - qlcnic_83xx_register_nic_idc_func(adapter, 1); - err = qlcnic_83xx_setup_mbx_intr(adapter); - if (err) { - dev_err(&adapter->pdev->dev, - "failed to setup mbx interrupt\n"); - qlcnic_clr_all_drv_state(adapter, 1); - clear_bit(__QLCNIC_AER, &adapter->state); - goto done; - } - } - if (netif_running(netdev)) { err = qlcnic_attach(adapter); if (err) { @@ -3464,8 +3490,8 @@ static int qlcnic_attach_func(struct pci_dev *pdev) return err; } -static pci_ers_result_t qlcnic_io_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) +pci_ers_result_t qlcnic_82xx_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) { struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; @@ -3484,12 +3510,6 @@ static pci_ers_result_t qlcnic_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) qlcnic_down(adapter, netdev); - if (qlcnic_83xx_check(adapter)) { - qlcnic_83xx_free_mbx_intr(adapter); - qlcnic_83xx_register_nic_idc_func(adapter, 0); - cancel_delayed_work_sync(&adapter->idc_aen_work); - } - qlcnic_detach(adapter); qlcnic_teardown_intr(adapter); @@ -3501,13 +3521,13 @@ static pci_ers_result_t qlcnic_io_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } -static pci_ers_result_t qlcnic_io_slot_reset(struct pci_dev *pdev) +pci_ers_result_t qlcnic_82xx_io_slot_reset(struct pci_dev *pdev) { return qlcnic_attach_func(pdev) ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } -static void qlcnic_io_resume(struct pci_dev *pdev) +void qlcnic_82xx_io_resume(struct pci_dev *pdev) { u32 state; struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); @@ -3517,9 +3537,48 @@ static void qlcnic_io_resume(struct pci_dev *pdev) if (state == QLCNIC_DEV_READY && test_and_clear_bit(__QLCNIC_AER, &adapter->state)) qlcnic_schedule_work(adapter, qlcnic_fw_poll_work, - FW_POLL_DELAY); + FW_POLL_DELAY); +} + +static pci_ers_result_t qlcnic_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); + struct qlcnic_hardware_ops *hw_ops = adapter->ahw->hw_ops; + + if (hw_ops->io_error_detected) { + return hw_ops->io_error_detected(pdev, state); + } else { + dev_err(&pdev->dev, "AER error_detected handler not registered.\n"); + return PCI_ERS_RESULT_DISCONNECT; + } } +static pci_ers_result_t qlcnic_io_slot_reset(struct pci_dev *pdev) +{ + struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); + struct qlcnic_hardware_ops *hw_ops = adapter->ahw->hw_ops; + + if (hw_ops->io_slot_reset) { + return hw_ops->io_slot_reset(pdev); + } else { + dev_err(&pdev->dev, "AER slot_reset handler not registered.\n"); + return PCI_ERS_RESULT_DISCONNECT; + } +} + +static void qlcnic_io_resume(struct pci_dev *pdev) +{ + struct qlcnic_adapter *adapter = pci_get_drvdata(pdev); + struct qlcnic_hardware_ops *hw_ops = adapter->ahw->hw_ops; + + if (hw_ops->io_resume) + hw_ops->io_resume(pdev); + else + dev_err(&pdev->dev, "AER resume handler not registered.\n"); +} + + static int qlcnicvf_start_firmware(struct qlcnic_adapter *adapter) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index 79e54efe07b9..15513608d480 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -1082,14 +1082,17 @@ flash_temp: } tmpl_hdr = ahw->fw_dump.tmpl_hdr; - tmpl_hdr->drv_cap_mask = QLCNIC_DUMP_MASK_DEF; + tmpl_hdr->drv_cap_mask = tmpl_hdr->cap_mask; + dev_info(&adapter->pdev->dev, + "Default minidump capture mask 0x%x\n", + tmpl_hdr->cap_mask); if ((tmpl_hdr->version & 0xfffff) >= 0x20001) ahw->fw_dump.use_pex_dma = true; else ahw->fw_dump.use_pex_dma = false; - ahw->fw_dump.enable = 1; + qlcnic_enable_fw_dump_state(adapter); return 0; } @@ -1112,7 +1115,11 @@ int qlcnic_dump_fw(struct qlcnic_adapter *adapter) ahw = adapter->ahw; - if (!fw_dump->enable) { + /* Return if we don't have firmware dump template header */ + if (!tmpl_hdr) + return -EIO; + + if (!qlcnic_check_fw_dump_state(adapter)) { dev_info(&adapter->pdev->dev, "Dump not enabled\n"); return -EIO; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 26f9aa66403d..652cc13c5023 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -398,14 +398,10 @@ int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *adapter, } static int qlcnic_sriov_set_pvid_mode(struct qlcnic_adapter *adapter, - struct qlcnic_cmd_args *cmd, u32 cap) + struct qlcnic_cmd_args *cmd) { - if (cap & QLC_83XX_PVID_STRIP_CAPABILITY) { - adapter->rx_pvid = 0; - } else { - adapter->rx_pvid = (cmd->rsp.arg[1] >> 16) & 0xffff; - adapter->flags &= ~QLCNIC_TAGGING_ENABLED; - } + adapter->rx_pvid = MSW(cmd->rsp.arg[1]) & 0xffff; + adapter->flags &= ~QLCNIC_TAGGING_ENABLED; return 0; } @@ -441,9 +437,8 @@ static int qlcnic_sriov_get_vf_acl(struct qlcnic_adapter *adapter, { struct qlcnic_sriov *sriov = adapter->ahw->sriov; struct qlcnic_cmd_args cmd; - int ret, cap; + int ret = 0; - cap = info->capabilities; ret = qlcnic_sriov_alloc_bc_mbx_args(&cmd, QLCNIC_BC_CMD_GET_ACL); if (ret) return ret; @@ -459,7 +454,7 @@ static int qlcnic_sriov_get_vf_acl(struct qlcnic_adapter *adapter, ret = qlcnic_sriov_set_guest_vlan_mode(adapter, &cmd); break; case QLC_PVID_MODE: - ret = qlcnic_sriov_set_pvid_mode(adapter, &cmd, cap); + ret = qlcnic_sriov_set_pvid_mode(adapter, &cmd); break; } } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 2d6faf0fcc10..a9bc651e9ffa 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1183,10 +1183,19 @@ static int qlcnic_sriov_pf_get_acl_cmd(struct qlcnic_bc_trans *trans, struct qlcnic_vf_info *vf = trans->vf; struct qlcnic_vport *vp = vf->vp; u8 cmd_op, mode = vp->vlan_mode; + struct qlcnic_adapter *adapter; + + adapter = vf->adapter; cmd_op = trans->req_hdr->cmd_op; cmd->rsp.arg[0] |= 1 << 25; + /* For 84xx adapter in case of PVID , PFD should send vlan mode as + * QLC_NO_VLAN_MODE to VFD which is zero in mailbox response + */ + if (qlcnic_84xx_check(adapter) && mode == QLC_PVID_MODE) + return 0; + switch (mode) { case QLC_GUEST_VLAN_MODE: cmd->rsp.arg[1] = mode | 1 << 8; @@ -1772,8 +1781,8 @@ int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, return 0; } -static inline __u32 qlcnic_sriov_get_vf_vlan(struct qlcnic_adapter *adapter, - struct qlcnic_vport *vp, int vf) +static __u32 qlcnic_sriov_get_vf_vlan(struct qlcnic_adapter *adapter, + struct qlcnic_vport *vp, int vf) { __u32 vlan = 0; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 660c3f5b2237..c6165d05cc13 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -465,8 +465,14 @@ static ssize_t qlcnic_sysfs_read_pm_config(struct file *filp, memset(&pm_cfg, 0, sizeof(struct qlcnic_pm_func_cfg) * QLCNIC_MAX_PCI_FUNC); - for (i = 0; i < adapter->ahw->act_pci_func; i++) { + for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) { pci_func = adapter->npars[i].pci_func; + if (!adapter->npars[i].active) + continue; + + if (!adapter->npars[i].eswitch_status) + continue; + pm_cfg[pci_func].action = adapter->npars[i].enable_pm; pm_cfg[pci_func].dest_npar = 0; pm_cfg[pci_func].pci_func = i; @@ -632,8 +638,14 @@ static ssize_t qlcnic_sysfs_read_esw_config(struct file *file, memset(&esw_cfg, 0, sizeof(struct qlcnic_esw_func_cfg) * QLCNIC_MAX_PCI_FUNC); - for (i = 0; i < adapter->ahw->act_pci_func; i++) { + for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) { pci_func = adapter->npars[i].pci_func; + if (!adapter->npars[i].active) + continue; + + if (!adapter->npars[i].eswitch_status) + continue; + esw_cfg[pci_func].pci_func = pci_func; if (qlcnic_get_eswitch_port_config(adapter, &esw_cfg[pci_func])) return QL_STATUS_INVALID_PARAM; @@ -732,6 +744,9 @@ static ssize_t qlcnic_sysfs_read_npar_config(struct file *file, if (ret) return ret; + if (!adapter->npars[i].eswitch_status) + continue; + np_cfg[i].pci_func = i; np_cfg[i].op_mode = (u8)nic_info.op_mode; np_cfg[i].port_num = nic_info.phys_port; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index c3570764f58f..06d8f62626e4 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2606,7 +2606,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) struct resource *res; struct net_device *ndev = NULL; struct sh_eth_private *mdp = NULL; - struct sh_eth_plat_data *pd = pdev->dev.platform_data; + struct sh_eth_plat_data *pd = dev_get_platdata(&pdev->dev); const struct platform_device_id *id = platform_get_device_id(pdev); /* get base addr */ @@ -2639,9 +2639,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev) SET_NETDEV_DEV(ndev, &pdev->dev); - /* Fill in the fields of the device structure with ethernet values. */ - ether_setup(ndev); - mdp = netdev_priv(ndev); mdp->num_tx_ring = TX_RING_SIZE; mdp->num_rx_ring = RX_RING_SIZE; diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 856e523ac936..c76571886011 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -721,7 +721,7 @@ static const struct net_device_ops sgiseeq_netdev_ops = { static int sgiseeq_probe(struct platform_device *pdev) { - struct sgiseeq_platform_data *pd = pdev->dev.platform_data; + struct sgiseeq_platform_data *pd = dev_get_platdata(&pdev->dev); struct hpc3_regs *hpcregs = pd->hpc; struct sgiseeq_init_block *sr; unsigned int irq = pd->irq; diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 4136ccc4a954..8b7152565c5e 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -1,5 +1,5 @@ config SFC - tristate "Solarflare SFC4000/SFC9000-family support" + tristate "Solarflare SFC4000/SFC9000/SFC9100-family support" depends on PCI select MDIO select CRC32 @@ -8,12 +8,13 @@ config SFC select PTP_1588_CLOCK ---help--- This driver supports 10-gigabit Ethernet cards based on - the Solarflare SFC4000 and SFC9000-family controllers. + the Solarflare SFC4000, SFC9000-family and SFC9100-family + controllers. To compile this driver as a module, choose M here. The module will be called sfc. config SFC_MTD - bool "Solarflare SFC4000/SFC9000-family MTD support" + bool "Solarflare SFC4000/SFC9000/SFC9100-family MTD support" depends on SFC && MTD && !(SFC=y && MTD=m) default y ---help--- @@ -21,7 +22,7 @@ config SFC_MTD (e.g. /dev/mtd1). This is required to update the firmware or the boot configuration under Linux. config SFC_MCDI_MON - bool "Solarflare SFC9000-family hwmon support" + bool "Solarflare SFC9000/SFC9100-family hwmon support" depends on SFC && HWMON && !(SFC=y && HWMON=m) default y ---help--- diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index a61272661a73..3a83c0dca8e6 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -1,5 +1,5 @@ -sfc-y += efx.o nic.o farch.o falcon.o siena.o tx.o rx.o \ - selftest.o ethtool.o qt202x_phy.o mdio_10g.o \ +sfc-y += efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \ + rx.o selftest.o ethtool.o qt202x_phy.o mdio_10g.o \ tenxpress.o txc43128_phy.o falcon_boards.o \ mcdi.o mcdi_port.o mcdi_mon.o ptp.o sfc-$(CONFIG_SFC_MTD) += mtd.o diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h index 5400a33f254f..17d83f37fbf2 100644 --- a/drivers/net/ethernet/sfc/bitfield.h +++ b/drivers/net/ethernet/sfc/bitfield.h @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2009 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -29,6 +29,10 @@ /* Lowest bit numbers and widths */ #define EFX_DUMMY_FIELD_LBN 0 #define EFX_DUMMY_FIELD_WIDTH 0 +#define EFX_WORD_0_LBN 0 +#define EFX_WORD_0_WIDTH 16 +#define EFX_WORD_1_LBN 16 +#define EFX_WORD_1_WIDTH 16 #define EFX_DWORD_0_LBN 0 #define EFX_DWORD_0_WIDTH 32 #define EFX_DWORD_1_LBN 32 diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c new file mode 100644 index 000000000000..5f42313b4965 --- /dev/null +++ b/drivers/net/ethernet/sfc/ef10.c @@ -0,0 +1,3043 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2012-2013 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "ef10_regs.h" +#include "io.h" +#include "mcdi.h" +#include "mcdi_pcol.h" +#include "nic.h" +#include "workarounds.h" +#include <linux/in.h> +#include <linux/jhash.h> +#include <linux/wait.h> +#include <linux/workqueue.h> + +/* Hardware control for EF10 architecture including 'Huntington'. */ + +#define EFX_EF10_DRVGEN_EV 7 +enum { + EFX_EF10_TEST = 1, + EFX_EF10_REFILL, +}; + +/* The reserved RSS context value */ +#define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff + +/* The filter table(s) are managed by firmware and we have write-only + * access. When removing filters we must identify them to the + * firmware by a 64-bit handle, but this is too wide for Linux kernel + * interfaces (32-bit for RX NFC, 16-bit for RFS). Also, we need to + * be able to tell in advance whether a requested insertion will + * replace an existing filter. Therefore we maintain a software hash + * table, which should be at least as large as the hardware hash + * table. + * + * Huntington has a single 8K filter table shared between all filter + * types and both ports. + */ +#define HUNT_FILTER_TBL_ROWS 8192 + +struct efx_ef10_filter_table { +/* The RX match field masks supported by this fw & hw, in order of priority */ + enum efx_filter_match_flags rx_match_flags[ + MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM]; + unsigned int rx_match_count; + + struct { + unsigned long spec; /* pointer to spec plus flag bits */ +/* BUSY flag indicates that an update is in progress. STACK_OLD is + * used to mark and sweep stack-owned MAC filters. + */ +#define EFX_EF10_FILTER_FLAG_BUSY 1UL +#define EFX_EF10_FILTER_FLAG_STACK_OLD 2UL +#define EFX_EF10_FILTER_FLAGS 3UL + u64 handle; /* firmware handle */ + } *entry; + wait_queue_head_t waitq; +/* Shadow of net_device address lists, guarded by mac_lock */ +#define EFX_EF10_FILTER_STACK_UC_MAX 32 +#define EFX_EF10_FILTER_STACK_MC_MAX 256 + struct { + u8 addr[ETH_ALEN]; + u16 id; + } stack_uc_list[EFX_EF10_FILTER_STACK_UC_MAX], + stack_mc_list[EFX_EF10_FILTER_STACK_MC_MAX]; + int stack_uc_count; /* negative for PROMISC */ + int stack_mc_count; /* negative for PROMISC/ALLMULTI */ +}; + +/* An arbitrary search limit for the software hash table */ +#define EFX_EF10_FILTER_SEARCH_LIMIT 200 + +static void efx_ef10_rx_push_indir_table(struct efx_nic *efx); +static void efx_ef10_rx_free_indir_table(struct efx_nic *efx); +static void efx_ef10_filter_table_remove(struct efx_nic *efx); + +static int efx_ef10_get_warm_boot_count(struct efx_nic *efx) +{ + efx_dword_t reg; + + efx_readd(efx, ®, ER_DZ_BIU_MC_SFT_STATUS); + return EFX_DWORD_FIELD(reg, EFX_WORD_1) == 0xb007 ? + EFX_DWORD_FIELD(reg, EFX_WORD_0) : -EIO; +} + +static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx) +{ + return resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]); +} + +static int efx_ef10_init_capabilities(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_CAPABILITIES_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_CAPABILITIES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (outlen >= sizeof(outbuf)) { + nic_data->datapath_caps = + MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1); + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) { + netif_err(efx, drv, efx->net_dev, + "Capabilities don't indicate TSO support.\n"); + return -ENODEV; + } + } + + return 0; +} + +static int efx_ef10_get_sysclk_freq(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN); + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_CLOCK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + return rc; + rc = MCDI_DWORD(outbuf, GET_CLOCK_OUT_SYS_FREQ); + return rc > 0 ? rc : -ERANGE; +} + +static int efx_ef10_get_mac_address(struct efx_nic *efx, u8 *mac_address) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_MAC_ADDRESSES_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_MAC_ADDRESSES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_GET_MAC_ADDRESSES_OUT_LEN) + return -EIO; + + memcpy(mac_address, + MCDI_PTR(outbuf, GET_MAC_ADDRESSES_OUT_MAC_ADDR_BASE), ETH_ALEN); + return 0; +} + +static int efx_ef10_probe(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data; + int i, rc; + + /* We can have one VI for each 8K region. However we need + * multiple TX queues per channel. + */ + efx->max_channels = + min_t(unsigned int, + EFX_MAX_CHANNELS, + resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]) / + (EFX_VI_PAGE_SIZE * EFX_TXQ_TYPES)); + BUG_ON(efx->max_channels == 0); + + nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL); + if (!nic_data) + return -ENOMEM; + efx->nic_data = nic_data; + + rc = efx_nic_alloc_buffer(efx, &nic_data->mcdi_buf, + 8 + MCDI_CTL_SDU_LEN_MAX_V2, GFP_KERNEL); + if (rc) + goto fail1; + + /* Get the MC's warm boot count. In case it's rebooting right + * now, be prepared to retry. + */ + i = 0; + for (;;) { + rc = efx_ef10_get_warm_boot_count(efx); + if (rc >= 0) + break; + if (++i == 5) + goto fail2; + ssleep(1); + } + nic_data->warm_boot_count = rc; + + nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; + + /* In case we're recovering from a crash (kexec), we want to + * cancel any outstanding request by the previous user of this + * function. We send a special message using the least + * significant bits of the 'high' (doorbell) register. + */ + _efx_writed(efx, cpu_to_le32(1), ER_DZ_MC_DB_HWRD); + + rc = efx_mcdi_init(efx); + if (rc) + goto fail2; + + /* Reset (most) configuration for this function */ + rc = efx_mcdi_reset(efx, RESET_TYPE_ALL); + if (rc) + goto fail3; + + /* Enable event logging */ + rc = efx_mcdi_log_ctrl(efx, true, false, 0); + if (rc) + goto fail3; + + rc = efx_ef10_init_capabilities(efx); + if (rc < 0) + goto fail3; + + efx->rx_packet_len_offset = + ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE; + + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) { + netif_err(efx, probe, efx->net_dev, + "current firmware does not support an RX prefix\n"); + rc = -ENODEV; + goto fail3; + } + + rc = efx_mcdi_port_get_number(efx); + if (rc < 0) + goto fail3; + efx->port_num = rc; + + rc = efx_ef10_get_mac_address(efx, efx->net_dev->perm_addr); + if (rc) + goto fail3; + + rc = efx_ef10_get_sysclk_freq(efx); + if (rc < 0) + goto fail3; + efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */ + + /* Check whether firmware supports bug 35388 workaround */ + rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true); + if (rc == 0) + nic_data->workaround_35388 = true; + else if (rc != -ENOSYS && rc != -ENOENT) + goto fail3; + netif_dbg(efx, probe, efx->net_dev, + "workaround for bug 35388 is %sabled\n", + nic_data->workaround_35388 ? "en" : "dis"); + + rc = efx_mcdi_mon_probe(efx); + if (rc) + goto fail3; + + efx_ptp_probe(efx); + + return 0; + +fail3: + efx_mcdi_fini(efx); +fail2: + efx_nic_free_buffer(efx, &nic_data->mcdi_buf); +fail1: + kfree(nic_data); + efx->nic_data = NULL; + return rc; +} + +static int efx_ef10_free_vis(struct efx_nic *efx) +{ + int rc = efx_mcdi_rpc(efx, MC_CMD_FREE_VIS, NULL, 0, NULL, 0, NULL); + + /* -EALREADY means nothing to free, so ignore */ + if (rc == -EALREADY) + rc = 0; + return rc; +} + +static void efx_ef10_remove(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + efx_mcdi_mon_remove(efx); + + /* This needs to be after efx_ptp_remove_channel() with no filters */ + efx_ef10_rx_free_indir_table(efx); + + rc = efx_ef10_free_vis(efx); + WARN_ON(rc != 0); + + efx_mcdi_fini(efx); + efx_nic_free_buffer(efx, &nic_data->mcdi_buf); + kfree(nic_data); +} + +static int efx_ef10_alloc_vis(struct efx_nic *efx, + unsigned int min_vis, unsigned int max_vis) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis); + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis); + rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc != 0) + return rc; + + if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN) + return -EIO; + + netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n", + MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE)); + + nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE); + nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT); + return 0; +} + +static int efx_ef10_dimension_resources(struct efx_nic *efx) +{ + unsigned int n_vis = + max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + + return efx_ef10_alloc_vis(efx, n_vis, n_vis); +} + +static int efx_ef10_init_nic(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (nic_data->must_realloc_vis) { + /* We cannot let the number of VIs change now */ + rc = efx_ef10_alloc_vis(efx, nic_data->n_allocated_vis, + nic_data->n_allocated_vis); + if (rc) + return rc; + nic_data->must_realloc_vis = false; + } + + efx_ef10_rx_push_indir_table(efx); + return 0; +} + +static int efx_ef10_map_reset_flags(u32 *flags) +{ + enum { + EF10_RESET_PORT = ((ETH_RESET_MAC | ETH_RESET_PHY) << + ETH_RESET_SHARED_SHIFT), + EF10_RESET_MC = ((ETH_RESET_DMA | ETH_RESET_FILTER | + ETH_RESET_OFFLOAD | ETH_RESET_MAC | + ETH_RESET_PHY | ETH_RESET_MGMT) << + ETH_RESET_SHARED_SHIFT) + }; + + /* We assume for now that our PCI function is permitted to + * reset everything. + */ + + if ((*flags & EF10_RESET_MC) == EF10_RESET_MC) { + *flags &= ~EF10_RESET_MC; + return RESET_TYPE_WORLD; + } + + if ((*flags & EF10_RESET_PORT) == EF10_RESET_PORT) { + *flags &= ~EF10_RESET_PORT; + return RESET_TYPE_ALL; + } + + /* no invisible reset implemented */ + + return -EINVAL; +} + +#define EF10_DMA_STAT(ext_name, mcdi_name) \ + [EF10_STAT_ ## ext_name] = \ + { #ext_name, 64, 8 * MC_CMD_MAC_ ## mcdi_name } +#define EF10_DMA_INVIS_STAT(int_name, mcdi_name) \ + [EF10_STAT_ ## int_name] = \ + { NULL, 64, 8 * MC_CMD_MAC_ ## mcdi_name } +#define EF10_OTHER_STAT(ext_name) \ + [EF10_STAT_ ## ext_name] = { #ext_name, 0, 0 } + +static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { + EF10_DMA_STAT(tx_bytes, TX_BYTES), + EF10_DMA_STAT(tx_packets, TX_PKTS), + EF10_DMA_STAT(tx_pause, TX_PAUSE_PKTS), + EF10_DMA_STAT(tx_control, TX_CONTROL_PKTS), + EF10_DMA_STAT(tx_unicast, TX_UNICAST_PKTS), + EF10_DMA_STAT(tx_multicast, TX_MULTICAST_PKTS), + EF10_DMA_STAT(tx_broadcast, TX_BROADCAST_PKTS), + EF10_DMA_STAT(tx_lt64, TX_LT64_PKTS), + EF10_DMA_STAT(tx_64, TX_64_PKTS), + EF10_DMA_STAT(tx_65_to_127, TX_65_TO_127_PKTS), + EF10_DMA_STAT(tx_128_to_255, TX_128_TO_255_PKTS), + EF10_DMA_STAT(tx_256_to_511, TX_256_TO_511_PKTS), + EF10_DMA_STAT(tx_512_to_1023, TX_512_TO_1023_PKTS), + EF10_DMA_STAT(tx_1024_to_15xx, TX_1024_TO_15XX_PKTS), + EF10_DMA_STAT(tx_15xx_to_jumbo, TX_15XX_TO_JUMBO_PKTS), + EF10_DMA_STAT(rx_bytes, RX_BYTES), + EF10_DMA_INVIS_STAT(rx_bytes_minus_good_bytes, RX_BAD_BYTES), + EF10_OTHER_STAT(rx_good_bytes), + EF10_OTHER_STAT(rx_bad_bytes), + EF10_DMA_STAT(rx_packets, RX_PKTS), + EF10_DMA_STAT(rx_good, RX_GOOD_PKTS), + EF10_DMA_STAT(rx_bad, RX_BAD_FCS_PKTS), + EF10_DMA_STAT(rx_pause, RX_PAUSE_PKTS), + EF10_DMA_STAT(rx_control, RX_CONTROL_PKTS), + EF10_DMA_STAT(rx_unicast, RX_UNICAST_PKTS), + EF10_DMA_STAT(rx_multicast, RX_MULTICAST_PKTS), + EF10_DMA_STAT(rx_broadcast, RX_BROADCAST_PKTS), + EF10_DMA_STAT(rx_lt64, RX_UNDERSIZE_PKTS), + EF10_DMA_STAT(rx_64, RX_64_PKTS), + EF10_DMA_STAT(rx_65_to_127, RX_65_TO_127_PKTS), + EF10_DMA_STAT(rx_128_to_255, RX_128_TO_255_PKTS), + EF10_DMA_STAT(rx_256_to_511, RX_256_TO_511_PKTS), + EF10_DMA_STAT(rx_512_to_1023, RX_512_TO_1023_PKTS), + EF10_DMA_STAT(rx_1024_to_15xx, RX_1024_TO_15XX_PKTS), + EF10_DMA_STAT(rx_15xx_to_jumbo, RX_15XX_TO_JUMBO_PKTS), + EF10_DMA_STAT(rx_gtjumbo, RX_GTJUMBO_PKTS), + EF10_DMA_STAT(rx_bad_gtjumbo, RX_JABBER_PKTS), + EF10_DMA_STAT(rx_overflow, RX_OVERFLOW_PKTS), + EF10_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS), + EF10_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS), + EF10_DMA_STAT(rx_nodesc_drops, RX_NODESC_DROPS), +}; + +#define HUNT_COMMON_STAT_MASK ((1ULL << EF10_STAT_tx_bytes) | \ + (1ULL << EF10_STAT_tx_packets) | \ + (1ULL << EF10_STAT_tx_pause) | \ + (1ULL << EF10_STAT_tx_unicast) | \ + (1ULL << EF10_STAT_tx_multicast) | \ + (1ULL << EF10_STAT_tx_broadcast) | \ + (1ULL << EF10_STAT_rx_bytes) | \ + (1ULL << EF10_STAT_rx_bytes_minus_good_bytes) | \ + (1ULL << EF10_STAT_rx_good_bytes) | \ + (1ULL << EF10_STAT_rx_bad_bytes) | \ + (1ULL << EF10_STAT_rx_packets) | \ + (1ULL << EF10_STAT_rx_good) | \ + (1ULL << EF10_STAT_rx_bad) | \ + (1ULL << EF10_STAT_rx_pause) | \ + (1ULL << EF10_STAT_rx_control) | \ + (1ULL << EF10_STAT_rx_unicast) | \ + (1ULL << EF10_STAT_rx_multicast) | \ + (1ULL << EF10_STAT_rx_broadcast) | \ + (1ULL << EF10_STAT_rx_lt64) | \ + (1ULL << EF10_STAT_rx_64) | \ + (1ULL << EF10_STAT_rx_65_to_127) | \ + (1ULL << EF10_STAT_rx_128_to_255) | \ + (1ULL << EF10_STAT_rx_256_to_511) | \ + (1ULL << EF10_STAT_rx_512_to_1023) | \ + (1ULL << EF10_STAT_rx_1024_to_15xx) | \ + (1ULL << EF10_STAT_rx_15xx_to_jumbo) | \ + (1ULL << EF10_STAT_rx_gtjumbo) | \ + (1ULL << EF10_STAT_rx_bad_gtjumbo) | \ + (1ULL << EF10_STAT_rx_overflow) | \ + (1ULL << EF10_STAT_rx_nodesc_drops)) + +/* These statistics are only provided by the 10G MAC. For a 10G/40G + * switchable port we do not expose these because they might not + * include all the packets they should. + */ +#define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_tx_control) | \ + (1ULL << EF10_STAT_tx_lt64) | \ + (1ULL << EF10_STAT_tx_64) | \ + (1ULL << EF10_STAT_tx_65_to_127) | \ + (1ULL << EF10_STAT_tx_128_to_255) | \ + (1ULL << EF10_STAT_tx_256_to_511) | \ + (1ULL << EF10_STAT_tx_512_to_1023) | \ + (1ULL << EF10_STAT_tx_1024_to_15xx) | \ + (1ULL << EF10_STAT_tx_15xx_to_jumbo)) + +/* These statistics are only provided by the 40G MAC. For a 10G/40G + * switchable port we do expose these because the errors will otherwise + * be silent. + */ +#define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) | \ + (1ULL << EF10_STAT_rx_length_error)) + +#if BITS_PER_LONG == 64 +#define STAT_MASK_BITMAP(bits) (bits) +#else +#define STAT_MASK_BITMAP(bits) (bits) & 0xffffffff, (bits) >> 32 +#endif + +static const unsigned long *efx_ef10_stat_mask(struct efx_nic *efx) +{ + static const unsigned long hunt_40g_stat_mask[] = { + STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | + HUNT_40G_EXTRA_STAT_MASK) + }; + static const unsigned long hunt_10g_only_stat_mask[] = { + STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | + HUNT_10G_ONLY_STAT_MASK) + }; + u32 port_caps = efx_mcdi_phy_get_caps(efx); + + if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + return hunt_40g_stat_mask; + else + return hunt_10g_only_stat_mask; +} + +static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names) +{ + return efx_nic_describe_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, + efx_ef10_stat_mask(efx), names); +} + +static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + const unsigned long *stats_mask = efx_ef10_stat_mask(efx); + __le64 generation_start, generation_end; + u64 *stats = nic_data->stats; + __le64 *dma_stats; + + dma_stats = efx->stats_buffer.addr; + nic_data = efx->nic_data; + + generation_end = dma_stats[MC_CMD_MAC_GENERATION_END]; + if (generation_end == EFX_MC_STATS_GENERATION_INVALID) + return 0; + rmb(); + efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask, + stats, efx->stats_buffer.addr, false); + generation_start = dma_stats[MC_CMD_MAC_GENERATION_START]; + if (generation_end != generation_start) + return -EAGAIN; + + /* Update derived statistics */ + stats[EF10_STAT_rx_good_bytes] = + stats[EF10_STAT_rx_bytes] - + stats[EF10_STAT_rx_bytes_minus_good_bytes]; + efx_update_diff_stat(&stats[EF10_STAT_rx_bad_bytes], + stats[EF10_STAT_rx_bytes_minus_good_bytes]); + + return 0; +} + + +static size_t efx_ef10_update_stats(struct efx_nic *efx, u64 *full_stats, + struct rtnl_link_stats64 *core_stats) +{ + const unsigned long *mask = efx_ef10_stat_mask(efx); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u64 *stats = nic_data->stats; + size_t stats_count = 0, index; + int retry; + + /* If we're unlucky enough to read statistics during the DMA, wait + * up to 10ms for it to finish (typically takes <500us) + */ + for (retry = 0; retry < 100; ++retry) { + if (efx_ef10_try_update_nic_stats(efx) == 0) + break; + udelay(100); + } + + if (full_stats) { + for_each_set_bit(index, mask, EF10_STAT_COUNT) { + if (efx_ef10_stat_desc[index].name) { + *full_stats++ = stats[index]; + ++stats_count; + } + } + } + + if (core_stats) { + core_stats->rx_packets = stats[EF10_STAT_rx_packets]; + core_stats->tx_packets = stats[EF10_STAT_tx_packets]; + core_stats->rx_bytes = stats[EF10_STAT_rx_bytes]; + core_stats->tx_bytes = stats[EF10_STAT_tx_bytes]; + core_stats->rx_dropped = stats[EF10_STAT_rx_nodesc_drops]; + core_stats->multicast = stats[EF10_STAT_rx_multicast]; + core_stats->rx_length_errors = + stats[EF10_STAT_rx_gtjumbo] + + stats[EF10_STAT_rx_length_error]; + core_stats->rx_crc_errors = stats[EF10_STAT_rx_bad]; + core_stats->rx_frame_errors = stats[EF10_STAT_rx_align_error]; + core_stats->rx_fifo_errors = stats[EF10_STAT_rx_overflow]; + core_stats->rx_errors = (core_stats->rx_length_errors + + core_stats->rx_crc_errors + + core_stats->rx_frame_errors); + } + + return stats_count; +} + +static void efx_ef10_push_irq_moderation(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + unsigned int mode, value; + efx_dword_t timer_cmd; + + if (channel->irq_moderation) { + mode = 3; + value = channel->irq_moderation - 1; + } else { + mode = 0; + value = 0; + } + + if (EFX_EF10_WORKAROUND_35388(efx)) { + EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS, + EFE_DD_EVQ_IND_TIMER_FLAGS, + ERF_DD_EVQ_IND_TIMER_MODE, mode, + ERF_DD_EVQ_IND_TIMER_VAL, value); + efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT, + channel->channel); + } else { + EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode, + ERF_DZ_TC_TIMER_VAL, value); + efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR, + channel->channel); + } +} + +static void efx_ef10_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol) +{ + wol->supported = 0; + wol->wolopts = 0; + memset(&wol->sopass, 0, sizeof(wol->sopass)); +} + +static int efx_ef10_set_wol(struct efx_nic *efx, u32 type) +{ + if (type != 0) + return -EINVAL; + return 0; +} + +static void efx_ef10_mcdi_request(struct efx_nic *efx, + const efx_dword_t *hdr, size_t hdr_len, + const efx_dword_t *sdu, size_t sdu_len) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u8 *pdu = nic_data->mcdi_buf.addr; + + memcpy(pdu, hdr, hdr_len); + memcpy(pdu + hdr_len, sdu, sdu_len); + wmb(); + + /* The hardware provides 'low' and 'high' (doorbell) registers + * for passing the 64-bit address of an MCDI request to + * firmware. However the dwords are swapped by firmware. The + * least significant bits of the doorbell are then 0 for all + * MCDI requests due to alignment. + */ + _efx_writed(efx, cpu_to_le32((u64)nic_data->mcdi_buf.dma_addr >> 32), + ER_DZ_MC_DB_LWRD); + _efx_writed(efx, cpu_to_le32((u32)nic_data->mcdi_buf.dma_addr), + ER_DZ_MC_DB_HWRD); +} + +static bool efx_ef10_mcdi_poll_response(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + const efx_dword_t hdr = *(const efx_dword_t *)nic_data->mcdi_buf.addr; + + rmb(); + return EFX_DWORD_FIELD(hdr, MCDI_HEADER_RESPONSE); +} + +static void +efx_ef10_mcdi_read_response(struct efx_nic *efx, efx_dword_t *outbuf, + size_t offset, size_t outlen) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + const u8 *pdu = nic_data->mcdi_buf.addr; + + memcpy(outbuf, pdu + offset, outlen); +} + +static int efx_ef10_mcdi_poll_reboot(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + rc = efx_ef10_get_warm_boot_count(efx); + if (rc < 0) { + /* The firmware is presumably in the process of + * rebooting. However, we are supposed to report each + * reboot just once, so we must only do that once we + * can read and store the updated warm boot count. + */ + return 0; + } + + if (rc == nic_data->warm_boot_count) + return 0; + + nic_data->warm_boot_count = rc; + + /* All our allocations have been reset */ + nic_data->must_realloc_vis = true; + nic_data->must_restore_filters = true; + nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; + + return -EIO; +} + +/* Handle an MSI interrupt + * + * Handle an MSI hardware interrupt. This routine schedules event + * queue processing. No interrupt acknowledgement cycle is necessary. + * Also, we never need to check that the interrupt is for us, since + * MSI interrupts cannot be shared. + */ +static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id) +{ + struct efx_msi_context *context = dev_id; + struct efx_nic *efx = context->efx; + + netif_vdbg(efx, intr, efx->net_dev, + "IRQ %d on CPU %d\n", irq, raw_smp_processor_id()); + + if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) { + /* Note test interrupts */ + if (context->index == efx->irq_level) + efx->last_irq_cpu = raw_smp_processor_id(); + + /* Schedule processing of the channel */ + efx_schedule_channel_irq(efx->channel[context->index]); + } + + return IRQ_HANDLED; +} + +static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id) +{ + struct efx_nic *efx = dev_id; + bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + struct efx_channel *channel; + efx_dword_t reg; + u32 queues; + + /* Read the ISR which also ACKs the interrupts */ + efx_readd(efx, ®, ER_DZ_BIU_INT_ISR); + queues = EFX_DWORD_FIELD(reg, ERF_DZ_ISR_REG); + + if (queues == 0) + return IRQ_NONE; + + if (likely(soft_enabled)) { + /* Note test interrupts */ + if (queues & (1U << efx->irq_level)) + efx->last_irq_cpu = raw_smp_processor_id(); + + efx_for_each_channel(channel, efx) { + if (queues & 1) + efx_schedule_channel_irq(channel); + queues >>= 1; + } + } + + netif_vdbg(efx, intr, efx->net_dev, + "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n", + irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg)); + + return IRQ_HANDLED; +} + +static void efx_ef10_irq_test_generate(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN); + + BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0); + + MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level); + (void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT, + inbuf, sizeof(inbuf), NULL, 0, NULL); +} + +static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue) +{ + return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf, + (tx_queue->ptr_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +/* This writes to the TX_DESC_WPTR and also pushes data */ +static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue, + const efx_qword_t *txd) +{ + unsigned int write_ptr; + efx_oword_t reg; + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + EFX_POPULATE_OWORD_1(reg, ERF_DZ_TX_DESC_WPTR, write_ptr); + reg.qword[0] = *txd; + efx_writeo_page(tx_queue->efx, ®, + ER_DZ_TX_DESC_UPD, tx_queue->queue); +} + +static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_TXQ_OUT_LEN); + bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; + size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; + struct efx_channel *channel = tx_queue->channel; + struct efx_nic *efx = tx_queue->efx; + size_t inlen, outlen; + dma_addr_t dma_addr; + efx_qword_t *txd; + int rc; + int i; + + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); + MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS, + INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, + INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + + dma_addr = tx_queue->txd.buf.dma_addr; + + netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n", + tx_queue->queue, entries, (u64)dma_addr); + + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + /* A previous user of this TX queue might have set us up the + * bomb by writing a descriptor to the TX push collector but + * not the doorbell. (Each collector belongs to a port, not a + * queue or function, so cannot easily be reset.) We must + * attempt to push a no-op descriptor in its place. + */ + tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION; + tx_queue->insert_count = 1; + txd = efx_tx_desc(tx_queue, 0); + EFX_POPULATE_QWORD_4(*txd, + ESF_DZ_TX_DESC_IS_OPT, true, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_CRC_CSUM, + ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, + ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); + tx_queue->write_count = 1; + wmb(); + efx_ef10_push_tx_desc(tx_queue, txd); + + return; + +fail: + WARN_ON(true); + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_TXQ_OUT_LEN); + struct efx_nic *efx = tx_queue->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE, + tx_queue->queue); + + rc = efx_mcdi_rpc(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue) +{ + efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf); +} + +/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */ +static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue) +{ + unsigned int write_ptr; + efx_dword_t reg; + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + EFX_POPULATE_DWORD_1(reg, ERF_DZ_TX_DESC_WPTR_DWORD, write_ptr); + efx_writed_page(tx_queue->efx, ®, + ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue); +} + +static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) +{ + unsigned int old_write_count = tx_queue->write_count; + struct efx_tx_buffer *buffer; + unsigned int write_ptr; + efx_qword_t *txd; + + BUG_ON(tx_queue->write_count == tx_queue->insert_count); + + do { + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + buffer = &tx_queue->buffer[write_ptr]; + txd = efx_tx_desc(tx_queue, write_ptr); + ++tx_queue->write_count; + + /* Create TX descriptor ring entry */ + if (buffer->flags & EFX_TX_BUF_OPTION) { + *txd = buffer->option; + } else { + BUILD_BUG_ON(EFX_TX_BUF_CONT != 1); + EFX_POPULATE_QWORD_3( + *txd, + ESF_DZ_TX_KER_CONT, + buffer->flags & EFX_TX_BUF_CONT, + ESF_DZ_TX_KER_BYTE_CNT, buffer->len, + ESF_DZ_TX_KER_BUF_ADDR, buffer->dma_addr); + } + } while (tx_queue->write_count != tx_queue->insert_count); + + wmb(); /* Ensure descriptors are written before they are fetched */ + + if (efx_nic_may_push_tx_desc(tx_queue, old_write_count)) { + txd = efx_tx_desc(tx_queue, + old_write_count & tx_queue->ptr_mask); + efx_ef10_push_tx_desc(tx_queue, txd); + ++tx_queue->pushes; + } else { + efx_ef10_notify_tx_desc(tx_queue); + } +} + +static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID, + EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, + MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, + EFX_MAX_CHANNELS); + + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc != 0) + return rc; + + if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN) + return -EIO; + + *context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID); + + return 0; +} + +static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN); + int rc; + + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID, + context); + + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); + WARN_ON(rc != 0); +} + +static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context) +{ + MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN); + MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN); + int i, rc; + + MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID, + context); + BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) != + MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN); + + for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i) + MCDI_PTR(tablebuf, + RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] = + (u8) efx->rx_indir_table[i]; + + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf, + sizeof(tablebuf), NULL, 0, NULL); + if (rc != 0) + return rc; + + MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID, + context); + BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) != + MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); + for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i) + MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = + efx->rx_hash_key[i]; + + return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf, + sizeof(keybuf), NULL, 0, NULL); +} + +static void efx_ef10_rx_free_indir_table(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID) + efx_ef10_free_rss_context(efx, nic_data->rx_rss_context); + nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; +} + +static void efx_ef10_rx_push_indir_table(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + netif_dbg(efx, drv, efx->net_dev, "pushing RX indirection table\n"); + + if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID) { + rc = efx_ef10_alloc_rss_context(efx, &nic_data->rx_rss_context); + if (rc != 0) + goto fail; + } + + rc = efx_ef10_populate_rss_table(efx, nic_data->rx_rss_context); + if (rc != 0) + goto fail; + + return; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue) +{ + return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf, + (rx_queue->ptr_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) +{ + MCDI_DECLARE_BUF(inbuf, + MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_RXQ_OUT_LEN); + struct efx_channel *channel = efx_rx_queue_channel(rx_queue); + size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE; + struct efx_nic *efx = rx_queue->efx; + size_t inlen, outlen; + dma_addr_t dma_addr; + int rc; + int i; + + rx_queue->scatter_n = 0; + rx_queue->scatter_len = 0; + + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue)); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE, + efx_rx_queue_index(rx_queue)); + MCDI_POPULATE_DWORD_1(inbuf, INIT_RXQ_IN_FLAGS, + INIT_RXQ_IN_FLAG_PREFIX, 1); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + + dma_addr = rx_queue->rxd.buf.dma_addr; + + netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n", + efx_rx_queue_index(rx_queue), entries, (u64)dma_addr); + + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_RXQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + return; + +fail: + WARN_ON(true); + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_RXQ_OUT_LEN); + struct efx_nic *efx = rx_queue->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE, + efx_rx_queue_index(rx_queue)); + + rc = efx_mcdi_rpc(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static void efx_ef10_rx_remove(struct efx_rx_queue *rx_queue) +{ + efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf); +} + +/* This creates an entry in the RX descriptor queue */ +static inline void +efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index) +{ + struct efx_rx_buffer *rx_buf; + efx_qword_t *rxd; + + rxd = efx_rx_desc(rx_queue, index); + rx_buf = efx_rx_buffer(rx_queue, index); + EFX_POPULATE_QWORD_2(*rxd, + ESF_DZ_RX_KER_BYTE_CNT, rx_buf->len, + ESF_DZ_RX_KER_BUF_ADDR, rx_buf->dma_addr); +} + +static void efx_ef10_rx_write(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int write_count; + efx_dword_t reg; + + /* Firmware requires that RX_DESC_WPTR be a multiple of 8 */ + write_count = rx_queue->added_count & ~7; + if (rx_queue->notified_count == write_count) + return; + + do + efx_ef10_build_rx_desc( + rx_queue, + rx_queue->notified_count & rx_queue->ptr_mask); + while (++rx_queue->notified_count != write_count); + + wmb(); + EFX_POPULATE_DWORD_1(reg, ERF_DZ_RX_DESC_WPTR, + write_count & rx_queue->ptr_mask); + efx_writed_page(efx, ®, ER_DZ_RX_DESC_UPD, + efx_rx_queue_index(rx_queue)); +} + +static efx_mcdi_async_completer efx_ef10_rx_defer_refill_complete; + +static void efx_ef10_rx_defer_refill(struct efx_rx_queue *rx_queue) +{ + struct efx_channel *channel = efx_rx_queue_channel(rx_queue); + MCDI_DECLARE_BUF(inbuf, MC_CMD_DRIVER_EVENT_IN_LEN); + efx_qword_t event; + + EFX_POPULATE_QWORD_2(event, + ESF_DZ_EV_CODE, EFX_EF10_DRVGEN_EV, + ESF_DZ_EV_DATA, EFX_EF10_REFILL); + + MCDI_SET_DWORD(inbuf, DRIVER_EVENT_IN_EVQ, channel->channel); + + /* MCDI_SET_QWORD is not appropriate here since EFX_POPULATE_* has + * already swapped the data to little-endian order. + */ + memcpy(MCDI_PTR(inbuf, DRIVER_EVENT_IN_DATA), &event.u64[0], + sizeof(efx_qword_t)); + + efx_mcdi_rpc_async(channel->efx, MC_CMD_DRIVER_EVENT, + inbuf, sizeof(inbuf), 0, + efx_ef10_rx_defer_refill_complete, 0); +} + +static void +efx_ef10_rx_defer_refill_complete(struct efx_nic *efx, unsigned long cookie, + int rc, efx_dword_t *outbuf, + size_t outlen_actual) +{ + /* nothing to do */ +} + +static int efx_ef10_ev_probe(struct efx_channel *channel) +{ + return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf, + (channel->eventq_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +static int efx_ef10_ev_init(struct efx_channel *channel) +{ + MCDI_DECLARE_BUF(inbuf, + MC_CMD_INIT_EVQ_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_OUT_LEN); + size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE; + struct efx_nic *efx = channel->efx; + struct efx_ef10_nic_data *nic_data; + bool supports_rx_merge; + size_t inlen, outlen; + dma_addr_t dma_addr; + int rc; + int i; + + nic_data = efx->nic_data; + supports_rx_merge = + !!(nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); + + /* Fill event queue with all ones (i.e. empty events) */ + memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len); + + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel); + /* INIT_EVQ expects index in vector table, not absolute */ + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel); + MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, + INIT_EVQ_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_IN_FLAG_RX_MERGE, 1, + INIT_EVQ_IN_FLAG_TX_MERGE, 1, + INIT_EVQ_IN_FLAG_CUT_THRU, !supports_rx_merge); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE, + MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE, + MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0); + + dma_addr = channel->eventq.buf.dma_addr; + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_EVQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + /* IRQ return is ignored */ + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +static void efx_ef10_ev_fini(struct efx_channel *channel) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_EVQ_OUT_LEN); + struct efx_nic *efx = channel->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel); + + rc = efx_mcdi_rpc(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static void efx_ef10_ev_remove(struct efx_channel *channel) +{ + efx_nic_free_buffer(channel->efx, &channel->eventq.buf); +} + +static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue, + unsigned int rx_queue_label) +{ + struct efx_nic *efx = rx_queue->efx; + + netif_info(efx, hw, efx->net_dev, + "rx event arrived on queue %d labeled as queue %u\n", + efx_rx_queue_index(rx_queue), rx_queue_label); + + efx_schedule_reset(efx, RESET_TYPE_DISABLE); +} + +static void +efx_ef10_handle_rx_bad_lbits(struct efx_rx_queue *rx_queue, + unsigned int actual, unsigned int expected) +{ + unsigned int dropped = (actual - expected) & rx_queue->ptr_mask; + struct efx_nic *efx = rx_queue->efx; + + netif_info(efx, hw, efx->net_dev, + "dropped %d events (index=%d expected=%d)\n", + dropped, actual, expected); + + efx_schedule_reset(efx, RESET_TYPE_DISABLE); +} + +/* partially received RX was aborted. clean up. */ +static void efx_ef10_handle_rx_abort(struct efx_rx_queue *rx_queue) +{ + unsigned int rx_desc_ptr; + + WARN_ON(rx_queue->scatter_n == 0); + + netif_dbg(rx_queue->efx, hw, rx_queue->efx->net_dev, + "scattered RX aborted (dropping %u buffers)\n", + rx_queue->scatter_n); + + rx_desc_ptr = rx_queue->removed_count & rx_queue->ptr_mask; + + efx_rx_packet(rx_queue, rx_desc_ptr, rx_queue->scatter_n, + 0, EFX_RX_PKT_DISCARD); + + rx_queue->removed_count += rx_queue->scatter_n; + rx_queue->scatter_n = 0; + rx_queue->scatter_len = 0; + ++efx_rx_queue_channel(rx_queue)->n_rx_nodesc_trunc; +} + +static int efx_ef10_handle_rx_event(struct efx_channel *channel, + const efx_qword_t *event) +{ + unsigned int rx_bytes, next_ptr_lbits, rx_queue_label, rx_l4_class; + unsigned int n_descs, n_packets, i; + struct efx_nic *efx = channel->efx; + struct efx_rx_queue *rx_queue; + bool rx_cont; + u16 flags = 0; + + if (unlikely(ACCESS_ONCE(efx->reset_pending))) + return 0; + + /* Basic packet information */ + rx_bytes = EFX_QWORD_FIELD(*event, ESF_DZ_RX_BYTES); + next_ptr_lbits = EFX_QWORD_FIELD(*event, ESF_DZ_RX_DSC_PTR_LBITS); + rx_queue_label = EFX_QWORD_FIELD(*event, ESF_DZ_RX_QLABEL); + rx_l4_class = EFX_QWORD_FIELD(*event, ESF_DZ_RX_L4_CLASS); + rx_cont = EFX_QWORD_FIELD(*event, ESF_DZ_RX_CONT); + + WARN_ON(EFX_QWORD_FIELD(*event, ESF_DZ_RX_DROP_EVENT)); + + rx_queue = efx_channel_get_rx_queue(channel); + + if (unlikely(rx_queue_label != efx_rx_queue_index(rx_queue))) + efx_ef10_handle_rx_wrong_queue(rx_queue, rx_queue_label); + + n_descs = ((next_ptr_lbits - rx_queue->removed_count) & + ((1 << ESF_DZ_RX_DSC_PTR_LBITS_WIDTH) - 1)); + + if (n_descs != rx_queue->scatter_n + 1) { + /* detect rx abort */ + if (unlikely(n_descs == rx_queue->scatter_n)) { + WARN_ON(rx_bytes != 0); + efx_ef10_handle_rx_abort(rx_queue); + return 0; + } + + if (unlikely(rx_queue->scatter_n != 0)) { + /* Scattered packet completions cannot be + * merged, so something has gone wrong. + */ + efx_ef10_handle_rx_bad_lbits( + rx_queue, next_ptr_lbits, + (rx_queue->removed_count + + rx_queue->scatter_n + 1) & + ((1 << ESF_DZ_RX_DSC_PTR_LBITS_WIDTH) - 1)); + return 0; + } + + /* Merged completion for multiple non-scattered packets */ + rx_queue->scatter_n = 1; + rx_queue->scatter_len = 0; + n_packets = n_descs; + ++channel->n_rx_merge_events; + channel->n_rx_merge_packets += n_packets; + flags |= EFX_RX_PKT_PREFIX_LEN; + } else { + ++rx_queue->scatter_n; + rx_queue->scatter_len += rx_bytes; + if (rx_cont) + return 0; + n_packets = 1; + } + + if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR))) + flags |= EFX_RX_PKT_DISCARD; + + if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR))) { + channel->n_rx_ip_hdr_chksum_err += n_packets; + } else if (unlikely(EFX_QWORD_FIELD(*event, + ESF_DZ_RX_TCPUDP_CKSUM_ERR))) { + channel->n_rx_tcp_udp_chksum_err += n_packets; + } else if (rx_l4_class == ESE_DZ_L4_CLASS_TCP || + rx_l4_class == ESE_DZ_L4_CLASS_UDP) { + flags |= EFX_RX_PKT_CSUMMED; + } + + if (rx_l4_class == ESE_DZ_L4_CLASS_TCP) + flags |= EFX_RX_PKT_TCP; + + channel->irq_mod_score += 2 * n_packets; + + /* Handle received packet(s) */ + for (i = 0; i < n_packets; i++) { + efx_rx_packet(rx_queue, + rx_queue->removed_count & rx_queue->ptr_mask, + rx_queue->scatter_n, rx_queue->scatter_len, + flags); + rx_queue->removed_count += rx_queue->scatter_n; + } + + rx_queue->scatter_n = 0; + rx_queue->scatter_len = 0; + + return n_packets; +} + +static int +efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + struct efx_tx_queue *tx_queue; + unsigned int tx_ev_desc_ptr; + unsigned int tx_ev_q_label; + int tx_descs = 0; + + if (unlikely(ACCESS_ONCE(efx->reset_pending))) + return 0; + + if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT))) + return 0; + + /* Transmit completion */ + tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX); + tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL); + tx_queue = efx_channel_get_tx_queue(channel, + tx_ev_q_label % EFX_TXQ_TYPES); + tx_descs = ((tx_ev_desc_ptr + 1 - tx_queue->read_count) & + tx_queue->ptr_mask); + efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); + + return tx_descs; +} + +static void +efx_ef10_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + int subcode; + + subcode = EFX_QWORD_FIELD(*event, ESF_DZ_DRV_SUB_CODE); + + switch (subcode) { + case ESE_DZ_DRV_TIMER_EV: + case ESE_DZ_DRV_WAKE_UP_EV: + break; + case ESE_DZ_DRV_START_UP_EV: + /* event queue init complete. ok. */ + break; + default: + netif_err(efx, hw, efx->net_dev, + "channel %d unknown driver event type %d" + " (data " EFX_QWORD_FMT ")\n", + channel->channel, subcode, + EFX_QWORD_VAL(*event)); + + } +} + +static void efx_ef10_handle_driver_generated_event(struct efx_channel *channel, + efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + u32 subcode; + + subcode = EFX_QWORD_FIELD(*event, EFX_DWORD_0); + + switch (subcode) { + case EFX_EF10_TEST: + channel->event_test_cpu = raw_smp_processor_id(); + break; + case EFX_EF10_REFILL: + /* The queue must be empty, so we won't receive any rx + * events, so efx_process_channel() won't refill the + * queue. Refill it here + */ + efx_fast_push_rx_descriptors(&channel->rx_queue); + break; + default: + netif_err(efx, hw, efx->net_dev, + "channel %d unknown driver event type %u" + " (data " EFX_QWORD_FMT ")\n", + channel->channel, (unsigned) subcode, + EFX_QWORD_VAL(*event)); + } +} + +static int efx_ef10_ev_process(struct efx_channel *channel, int quota) +{ + struct efx_nic *efx = channel->efx; + efx_qword_t event, *p_event; + unsigned int read_ptr; + int ev_code; + int tx_descs = 0; + int spent = 0; + + read_ptr = channel->eventq_read_ptr; + + for (;;) { + p_event = efx_event(channel, read_ptr); + event = *p_event; + + if (!efx_event_present(&event)) + break; + + EFX_SET_QWORD(*p_event); + + ++read_ptr; + + ev_code = EFX_QWORD_FIELD(event, ESF_DZ_EV_CODE); + + netif_vdbg(efx, drv, efx->net_dev, + "processing event on %d " EFX_QWORD_FMT "\n", + channel->channel, EFX_QWORD_VAL(event)); + + switch (ev_code) { + case ESE_DZ_EV_CODE_MCDI_EV: + efx_mcdi_process_event(channel, &event); + break; + case ESE_DZ_EV_CODE_RX_EV: + spent += efx_ef10_handle_rx_event(channel, &event); + if (spent >= quota) { + /* XXX can we split a merged event to + * avoid going over-quota? + */ + spent = quota; + goto out; + } + break; + case ESE_DZ_EV_CODE_TX_EV: + tx_descs += efx_ef10_handle_tx_event(channel, &event); + if (tx_descs > efx->txq_entries) { + spent = quota; + goto out; + } else if (++spent == quota) { + goto out; + } + break; + case ESE_DZ_EV_CODE_DRIVER_EV: + efx_ef10_handle_driver_event(channel, &event); + if (++spent == quota) + goto out; + break; + case EFX_EF10_DRVGEN_EV: + efx_ef10_handle_driver_generated_event(channel, &event); + break; + default: + netif_err(efx, hw, efx->net_dev, + "channel %d unknown event type %d" + " (data " EFX_QWORD_FMT ")\n", + channel->channel, ev_code, + EFX_QWORD_VAL(event)); + } + } + +out: + channel->eventq_read_ptr = read_ptr; + return spent; +} + +static void efx_ef10_ev_read_ack(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + efx_dword_t rptr; + + if (EFX_EF10_WORKAROUND_35388(efx)) { + BUILD_BUG_ON(EFX_MIN_EVQ_SIZE < + (1 << ERF_DD_EVQ_IND_RPTR_WIDTH)); + BUILD_BUG_ON(EFX_MAX_EVQ_SIZE > + (1 << 2 * ERF_DD_EVQ_IND_RPTR_WIDTH)); + + EFX_POPULATE_DWORD_2(rptr, ERF_DD_EVQ_IND_RPTR_FLAGS, + EFE_DD_EVQ_IND_RPTR_FLAGS_HIGH, + ERF_DD_EVQ_IND_RPTR, + (channel->eventq_read_ptr & + channel->eventq_mask) >> + ERF_DD_EVQ_IND_RPTR_WIDTH); + efx_writed_page(efx, &rptr, ER_DD_EVQ_INDIRECT, + channel->channel); + EFX_POPULATE_DWORD_2(rptr, ERF_DD_EVQ_IND_RPTR_FLAGS, + EFE_DD_EVQ_IND_RPTR_FLAGS_LOW, + ERF_DD_EVQ_IND_RPTR, + channel->eventq_read_ptr & + ((1 << ERF_DD_EVQ_IND_RPTR_WIDTH) - 1)); + efx_writed_page(efx, &rptr, ER_DD_EVQ_INDIRECT, + channel->channel); + } else { + EFX_POPULATE_DWORD_1(rptr, ERF_DZ_EVQ_RPTR, + channel->eventq_read_ptr & + channel->eventq_mask); + efx_writed_page(efx, &rptr, ER_DZ_EVQ_RPTR, channel->channel); + } +} + +static void efx_ef10_ev_test_generate(struct efx_channel *channel) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_DRIVER_EVENT_IN_LEN); + struct efx_nic *efx = channel->efx; + efx_qword_t event; + int rc; + + EFX_POPULATE_QWORD_2(event, + ESF_DZ_EV_CODE, EFX_EF10_DRVGEN_EV, + ESF_DZ_EV_DATA, EFX_EF10_TEST); + + MCDI_SET_DWORD(inbuf, DRIVER_EVENT_IN_EVQ, channel->channel); + + /* MCDI_SET_QWORD is not appropriate here since EFX_POPULATE_* has + * already swapped the data to little-endian order. + */ + memcpy(MCDI_PTR(inbuf, DRIVER_EVENT_IN_DATA), &event.u64[0], + sizeof(efx_qword_t)); + + rc = efx_mcdi_rpc(efx, MC_CMD_DRIVER_EVENT, inbuf, sizeof(inbuf), + NULL, 0, NULL); + if (rc != 0) + goto fail; + + return; + +fail: + WARN_ON(true); + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +void efx_ef10_handle_drain_event(struct efx_nic *efx) +{ + if (atomic_dec_and_test(&efx->active_queues)) + wake_up(&efx->flush_wq); + + WARN_ON(atomic_read(&efx->active_queues) < 0); +} + +static int efx_ef10_fini_dmaq(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int pending; + + /* If the MC has just rebooted, the TX/RX queues will have already been + * torn down, but efx->active_queues needs to be set to zero. + */ + if (nic_data->must_realloc_vis) { + atomic_set(&efx->active_queues, 0); + return 0; + } + + /* Do not attempt to write to the NIC during EEH recovery */ + if (efx->state != STATE_RECOVERY) { + efx_for_each_channel(channel, efx) { + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_ef10_rx_fini(rx_queue); + efx_for_each_channel_tx_queue(tx_queue, channel) + efx_ef10_tx_fini(tx_queue); + } + + wait_event_timeout(efx->flush_wq, + atomic_read(&efx->active_queues) == 0, + msecs_to_jiffies(EFX_MAX_FLUSH_TIME)); + pending = atomic_read(&efx->active_queues); + if (pending) { + netif_err(efx, hw, efx->net_dev, "failed to flush %d queues\n", + pending); + return -ETIMEDOUT; + } + } + + return 0; +} + +static bool efx_ef10_filter_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right) +{ + if ((left->match_flags ^ right->match_flags) | + ((left->flags ^ right->flags) & + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) + return false; + + return memcmp(&left->outer_vid, &right->outer_vid, + sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) == 0; +} + +static unsigned int efx_ef10_filter_hash(const struct efx_filter_spec *spec) +{ + BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); + return jhash2((const u32 *)&spec->outer_vid, + (sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) / 4, + 0); + /* XXX should we randomise the initval? */ +} + +/* Decide whether a filter should be exclusive or else should allow + * delivery to additional recipients. Currently we decide that + * filters for specific local unicast MAC and IP addresses are + * exclusive. + */ +static bool efx_ef10_filter_is_exclusive(const struct efx_filter_spec *spec) +{ + if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC && + !is_multicast_ether_addr(spec->loc_mac)) + return true; + + if ((spec->match_flags & + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { + if (spec->ether_type == htons(ETH_P_IP) && + !ipv4_is_multicast(spec->loc_host[0])) + return true; + if (spec->ether_type == htons(ETH_P_IPV6) && + ((const u8 *)spec->loc_host)[0] != 0xff) + return true; + } + + return false; +} + +static struct efx_filter_spec * +efx_ef10_filter_entry_spec(const struct efx_ef10_filter_table *table, + unsigned int filter_idx) +{ + return (struct efx_filter_spec *)(table->entry[filter_idx].spec & + ~EFX_EF10_FILTER_FLAGS); +} + +static unsigned int +efx_ef10_filter_entry_flags(const struct efx_ef10_filter_table *table, + unsigned int filter_idx) +{ + return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS; +} + +static void +efx_ef10_filter_set_entry(struct efx_ef10_filter_table *table, + unsigned int filter_idx, + const struct efx_filter_spec *spec, + unsigned int flags) +{ + table->entry[filter_idx].spec = (unsigned long)spec | flags; +} + +static void efx_ef10_filter_push_prep(struct efx_nic *efx, + const struct efx_filter_spec *spec, + efx_dword_t *inbuf, u64 handle, + bool replacing) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + memset(inbuf, 0, MC_CMD_FILTER_OP_IN_LEN); + + if (replacing) { + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + MC_CMD_FILTER_OP_IN_OP_REPLACE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle); + } else { + u32 match_fields = 0; + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + efx_ef10_filter_is_exclusive(spec) ? + MC_CMD_FILTER_OP_IN_OP_INSERT : + MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE); + + /* Convert match flags and values. Unlike almost + * everything else in MCDI, these fields are in + * network byte order. + */ + if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) + match_fields |= + is_multicast_ether_addr(spec->loc_mac) ? + 1 << MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_MCAST_DST_LBN : + 1 << MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_UCAST_DST_LBN; +#define COPY_FIELD(gen_flag, gen_field, mcdi_field) \ + if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) { \ + match_fields |= \ + 1 << MC_CMD_FILTER_OP_IN_MATCH_ ## \ + mcdi_field ## _LBN; \ + BUILD_BUG_ON( \ + MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \ + sizeof(spec->gen_field)); \ + memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ## mcdi_field), \ + &spec->gen_field, sizeof(spec->gen_field)); \ + } + COPY_FIELD(REM_HOST, rem_host, SRC_IP); + COPY_FIELD(LOC_HOST, loc_host, DST_IP); + COPY_FIELD(REM_MAC, rem_mac, SRC_MAC); + COPY_FIELD(REM_PORT, rem_port, SRC_PORT); + COPY_FIELD(LOC_MAC, loc_mac, DST_MAC); + COPY_FIELD(LOC_PORT, loc_port, DST_PORT); + COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE); + COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN); + COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN); + COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO); +#undef COPY_FIELD + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS, + match_fields); + } + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST, + spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? + MC_CMD_FILTER_OP_IN_RX_DEST_DROP : + MC_CMD_FILTER_OP_IN_RX_DEST_HOST); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST, + MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE, spec->dmaq_id); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE, + (spec->flags & EFX_FILTER_FLAG_RX_RSS) ? + MC_CMD_FILTER_OP_IN_RX_MODE_RSS : + MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE); + if (spec->flags & EFX_FILTER_FLAG_RX_RSS) + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, + spec->rss_context != + EFX_FILTER_RSS_CONTEXT_DEFAULT ? + spec->rss_context : nic_data->rx_rss_context); +} + +static int efx_ef10_filter_push(struct efx_nic *efx, + const struct efx_filter_spec *spec, + u64 *handle, bool replacing) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_OUT_LEN); + int rc; + + efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, replacing); + rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + if (rc == 0) + *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE); + return rc; +} + +static int efx_ef10_filter_rx_match_pri(struct efx_ef10_filter_table *table, + enum efx_filter_match_flags match_flags) +{ + unsigned int match_pri; + + for (match_pri = 0; + match_pri < table->rx_match_count; + match_pri++) + if (table->rx_match_flags[match_pri] == match_flags) + return match_pri; + + return -EPROTONOSUPPORT; +} + +static s32 efx_ef10_filter_insert(struct efx_nic *efx, + struct efx_filter_spec *spec, + bool replace_equal) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); + struct efx_filter_spec *saved_spec; + unsigned int match_pri, hash; + unsigned int priv_flags; + bool replacing = false; + int ins_index = -1; + DEFINE_WAIT(wait); + bool is_mc_recip; + s32 rc; + + /* For now, only support RX filters */ + if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) != + EFX_FILTER_FLAG_RX) + return -EINVAL; + + rc = efx_ef10_filter_rx_match_pri(table, spec->match_flags); + if (rc < 0) + return rc; + match_pri = rc; + + hash = efx_ef10_filter_hash(spec); + is_mc_recip = efx_filter_is_mc_recipient(spec); + if (is_mc_recip) + bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); + + /* Find any existing filters with the same match tuple or + * else a free slot to insert at. If any of them are busy, + * we have to wait and retry. + */ + for (;;) { + unsigned int depth = 1; + unsigned int i; + + spin_lock_bh(&efx->filter_lock); + + for (;;) { + i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); + saved_spec = efx_ef10_filter_entry_spec(table, i); + + if (!saved_spec) { + if (ins_index < 0) + ins_index = i; + } else if (efx_ef10_filter_equal(spec, saved_spec)) { + if (table->entry[i].spec & + EFX_EF10_FILTER_FLAG_BUSY) + break; + if (spec->priority < saved_spec->priority && + !(saved_spec->priority == + EFX_FILTER_PRI_REQUIRED && + saved_spec->flags & + EFX_FILTER_FLAG_RX_STACK)) { + rc = -EPERM; + goto out_unlock; + } + if (!is_mc_recip) { + /* This is the only one */ + if (spec->priority == + saved_spec->priority && + !replace_equal) { + rc = -EEXIST; + goto out_unlock; + } + ins_index = i; + goto found; + } else if (spec->priority > + saved_spec->priority || + (spec->priority == + saved_spec->priority && + replace_equal)) { + if (ins_index < 0) + ins_index = i; + else + __set_bit(depth, mc_rem_map); + } + } + + /* Once we reach the maximum search depth, use + * the first suitable slot or return -EBUSY if + * there was none + */ + if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) { + if (ins_index < 0) { + rc = -EBUSY; + goto out_unlock; + } + goto found; + } + + ++depth; + } + + prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_bh(&efx->filter_lock); + schedule(); + } + +found: + /* Create a software table entry if necessary, and mark it + * busy. We might yet fail to insert, but any attempt to + * insert a conflicting filter while we're waiting for the + * firmware must find the busy entry. + */ + saved_spec = efx_ef10_filter_entry_spec(table, ins_index); + if (saved_spec) { + if (spec->flags & EFX_FILTER_FLAG_RX_STACK) { + /* Just make sure it won't be removed */ + saved_spec->flags |= EFX_FILTER_FLAG_RX_STACK; + table->entry[ins_index].spec &= + ~EFX_EF10_FILTER_FLAG_STACK_OLD; + rc = ins_index; + goto out_unlock; + } + replacing = true; + priv_flags = efx_ef10_filter_entry_flags(table, ins_index); + } else { + saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC); + if (!saved_spec) { + rc = -ENOMEM; + goto out_unlock; + } + *saved_spec = *spec; + priv_flags = 0; + } + efx_ef10_filter_set_entry(table, ins_index, saved_spec, + priv_flags | EFX_EF10_FILTER_FLAG_BUSY); + + /* Mark lower-priority multicast recipients busy prior to removal */ + if (is_mc_recip) { + unsigned int depth, i; + + for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { + i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); + if (test_bit(depth, mc_rem_map)) + table->entry[i].spec |= + EFX_EF10_FILTER_FLAG_BUSY; + } + } + + spin_unlock_bh(&efx->filter_lock); + + rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle, + replacing); + + /* Finalise the software table entry */ + spin_lock_bh(&efx->filter_lock); + if (rc == 0) { + if (replacing) { + /* Update the fields that may differ */ + saved_spec->priority = spec->priority; + saved_spec->flags &= EFX_FILTER_FLAG_RX_STACK; + saved_spec->flags |= spec->flags; + saved_spec->rss_context = spec->rss_context; + saved_spec->dmaq_id = spec->dmaq_id; + } + } else if (!replacing) { + kfree(saved_spec); + saved_spec = NULL; + } + efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags); + + /* Remove and finalise entries for lower-priority multicast + * recipients + */ + if (is_mc_recip) { + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + unsigned int depth, i; + + memset(inbuf, 0, sizeof(inbuf)); + + for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { + if (!test_bit(depth, mc_rem_map)) + continue; + + i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); + saved_spec = efx_ef10_filter_entry_spec(table, i); + priv_flags = efx_ef10_filter_entry_flags(table, i); + + if (rc == 0) { + spin_unlock_bh(&efx->filter_lock); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, + table->entry[i].handle); + rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, + inbuf, sizeof(inbuf), + NULL, 0, NULL); + spin_lock_bh(&efx->filter_lock); + } + + if (rc == 0) { + kfree(saved_spec); + saved_spec = NULL; + priv_flags = 0; + } else { + priv_flags &= ~EFX_EF10_FILTER_FLAG_BUSY; + } + efx_ef10_filter_set_entry(table, i, saved_spec, + priv_flags); + } + } + + /* If successful, return the inserted filter ID */ + if (rc == 0) + rc = match_pri * HUNT_FILTER_TBL_ROWS + ins_index; + + wake_up_all(&table->waitq); +out_unlock: + spin_unlock_bh(&efx->filter_lock); + finish_wait(&table->waitq, &wait); + return rc; +} + +void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx) +{ + /* no need to do anything here on EF10 */ +} + +/* Remove a filter. + * If !stack_requested, remove by ID + * If stack_requested, remove by index + * Filter ID may come from userland and must be range-checked. + */ +static int efx_ef10_filter_remove_internal(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id, bool stack_requested) +{ + unsigned int filter_idx = filter_id % HUNT_FILTER_TBL_ROWS; + struct efx_ef10_filter_table *table = efx->filter_state; + MCDI_DECLARE_BUF(inbuf, + MC_CMD_FILTER_OP_IN_HANDLE_OFST + + MC_CMD_FILTER_OP_IN_HANDLE_LEN); + struct efx_filter_spec *spec; + DEFINE_WAIT(wait); + int rc; + + /* Find the software table entry and mark it busy. Don't + * remove it yet; any attempt to update while we're waiting + * for the firmware must find the busy entry. + */ + for (;;) { + spin_lock_bh(&efx->filter_lock); + if (!(table->entry[filter_idx].spec & + EFX_EF10_FILTER_FLAG_BUSY)) + break; + prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_bh(&efx->filter_lock); + schedule(); + } + spec = efx_ef10_filter_entry_spec(table, filter_idx); + if (!spec || spec->priority > priority || + (!stack_requested && + efx_ef10_filter_rx_match_pri(table, spec->match_flags) != + filter_id / HUNT_FILTER_TBL_ROWS)) { + rc = -ENOENT; + goto out_unlock; + } + table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY; + spin_unlock_bh(&efx->filter_lock); + + if (spec->flags & EFX_FILTER_FLAG_RX_STACK && !stack_requested) { + /* Reset steering of a stack-owned filter */ + + struct efx_filter_spec new_spec = *spec; + + new_spec.priority = EFX_FILTER_PRI_REQUIRED; + new_spec.flags = (EFX_FILTER_FLAG_RX | + EFX_FILTER_FLAG_RX_RSS | + EFX_FILTER_FLAG_RX_STACK); + new_spec.dmaq_id = 0; + new_spec.rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT; + rc = efx_ef10_filter_push(efx, &new_spec, + &table->entry[filter_idx].handle, + true); + + spin_lock_bh(&efx->filter_lock); + if (rc == 0) + *spec = new_spec; + } else { + /* Really remove the filter */ + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + efx_ef10_filter_is_exclusive(spec) ? + MC_CMD_FILTER_OP_IN_OP_REMOVE : + MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, + table->entry[filter_idx].handle); + rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, + inbuf, sizeof(inbuf), NULL, 0, NULL); + + spin_lock_bh(&efx->filter_lock); + if (rc == 0) { + kfree(spec); + efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); + } + } + table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY; + wake_up_all(&table->waitq); +out_unlock: + spin_unlock_bh(&efx->filter_lock); + finish_wait(&table->waitq, &wait); + return rc; +} + +static int efx_ef10_filter_remove_safe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id) +{ + return efx_ef10_filter_remove_internal(efx, priority, filter_id, false); +} + +static int efx_ef10_filter_get_safe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id, struct efx_filter_spec *spec) +{ + unsigned int filter_idx = filter_id % HUNT_FILTER_TBL_ROWS; + struct efx_ef10_filter_table *table = efx->filter_state; + const struct efx_filter_spec *saved_spec; + int rc; + + spin_lock_bh(&efx->filter_lock); + saved_spec = efx_ef10_filter_entry_spec(table, filter_idx); + if (saved_spec && saved_spec->priority == priority && + efx_ef10_filter_rx_match_pri(table, saved_spec->match_flags) == + filter_id / HUNT_FILTER_TBL_ROWS) { + *spec = *saved_spec; + rc = 0; + } else { + rc = -ENOENT; + } + spin_unlock_bh(&efx->filter_lock); + return rc; +} + +static void efx_ef10_filter_clear_rx(struct efx_nic *efx, + enum efx_filter_priority priority) +{ + /* TODO */ +} + +static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx, + enum efx_filter_priority priority) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + unsigned int filter_idx; + s32 count = 0; + + spin_lock_bh(&efx->filter_lock); + for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { + if (table->entry[filter_idx].spec && + efx_ef10_filter_entry_spec(table, filter_idx)->priority == + priority) + ++count; + } + spin_unlock_bh(&efx->filter_lock); + return count; +} + +static u32 efx_ef10_filter_get_rx_id_limit(struct efx_nic *efx) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + + return table->rx_match_count * HUNT_FILTER_TBL_ROWS; +} + +static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 *buf, u32 size) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + struct efx_filter_spec *spec; + unsigned int filter_idx; + s32 count = 0; + + spin_lock_bh(&efx->filter_lock); + for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { + spec = efx_ef10_filter_entry_spec(table, filter_idx); + if (spec && spec->priority == priority) { + if (count == size) { + count = -EMSGSIZE; + break; + } + buf[count++] = (efx_ef10_filter_rx_match_pri( + table, spec->match_flags) * + HUNT_FILTER_TBL_ROWS + + filter_idx); + } + } + spin_unlock_bh(&efx->filter_lock); + return count; +} + +#ifdef CONFIG_RFS_ACCEL + +static efx_mcdi_async_completer efx_ef10_filter_rfs_insert_complete; + +static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx, + struct efx_filter_spec *spec) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + struct efx_filter_spec *saved_spec; + unsigned int hash, i, depth = 1; + bool replacing = false; + int ins_index = -1; + u64 cookie; + s32 rc; + + /* Must be an RX filter without RSS and not for a multicast + * destination address (RFS only works for connected sockets). + * These restrictions allow us to pass only a tiny amount of + * data through to the completion function. + */ + EFX_WARN_ON_PARANOID(spec->flags != + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_SCATTER)); + EFX_WARN_ON_PARANOID(spec->priority != EFX_FILTER_PRI_HINT); + EFX_WARN_ON_PARANOID(efx_filter_is_mc_recipient(spec)); + + hash = efx_ef10_filter_hash(spec); + + spin_lock_bh(&efx->filter_lock); + + /* Find any existing filter with the same match tuple or else + * a free slot to insert at. If an existing filter is busy, + * we have to give up. + */ + for (;;) { + i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); + saved_spec = efx_ef10_filter_entry_spec(table, i); + + if (!saved_spec) { + if (ins_index < 0) + ins_index = i; + } else if (efx_ef10_filter_equal(spec, saved_spec)) { + if (table->entry[i].spec & EFX_EF10_FILTER_FLAG_BUSY) { + rc = -EBUSY; + goto fail_unlock; + } + EFX_WARN_ON_PARANOID(saved_spec->flags & + EFX_FILTER_FLAG_RX_STACK); + if (spec->priority < saved_spec->priority) { + rc = -EPERM; + goto fail_unlock; + } + ins_index = i; + break; + } + + /* Once we reach the maximum search depth, use the + * first suitable slot or return -EBUSY if there was + * none + */ + if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) { + if (ins_index < 0) { + rc = -EBUSY; + goto fail_unlock; + } + break; + } + + ++depth; + } + + /* Create a software table entry if necessary, and mark it + * busy. We might yet fail to insert, but any attempt to + * insert a conflicting filter while we're waiting for the + * firmware must find the busy entry. + */ + saved_spec = efx_ef10_filter_entry_spec(table, ins_index); + if (saved_spec) { + replacing = true; + } else { + saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC); + if (!saved_spec) { + rc = -ENOMEM; + goto fail_unlock; + } + *saved_spec = *spec; + } + efx_ef10_filter_set_entry(table, ins_index, saved_spec, + EFX_EF10_FILTER_FLAG_BUSY); + + spin_unlock_bh(&efx->filter_lock); + + /* Pack up the variables needed on completion */ + cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id; + + efx_ef10_filter_push_prep(efx, spec, inbuf, + table->entry[ins_index].handle, replacing); + efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), + MC_CMD_FILTER_OP_OUT_LEN, + efx_ef10_filter_rfs_insert_complete, cookie); + + return ins_index; + +fail_unlock: + spin_unlock_bh(&efx->filter_lock); + return rc; +} + +static void +efx_ef10_filter_rfs_insert_complete(struct efx_nic *efx, unsigned long cookie, + int rc, efx_dword_t *outbuf, + size_t outlen_actual) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + unsigned int ins_index, dmaq_id; + struct efx_filter_spec *spec; + bool replacing; + + /* Unpack the cookie */ + replacing = cookie >> 31; + ins_index = (cookie >> 16) & (HUNT_FILTER_TBL_ROWS - 1); + dmaq_id = cookie & 0xffff; + + spin_lock_bh(&efx->filter_lock); + spec = efx_ef10_filter_entry_spec(table, ins_index); + if (rc == 0) { + table->entry[ins_index].handle = + MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE); + if (replacing) + spec->dmaq_id = dmaq_id; + } else if (!replacing) { + kfree(spec); + spec = NULL; + } + efx_ef10_filter_set_entry(table, ins_index, spec, 0); + spin_unlock_bh(&efx->filter_lock); + + wake_up_all(&table->waitq); +} + +static void +efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx, + unsigned long filter_idx, + int rc, efx_dword_t *outbuf, + size_t outlen_actual); + +static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, + unsigned int filter_idx) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + struct efx_filter_spec *spec = + efx_ef10_filter_entry_spec(table, filter_idx); + MCDI_DECLARE_BUF(inbuf, + MC_CMD_FILTER_OP_IN_HANDLE_OFST + + MC_CMD_FILTER_OP_IN_HANDLE_LEN); + + if (!spec || + (table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAG_BUSY) || + spec->priority != EFX_FILTER_PRI_HINT || + !rps_may_expire_flow(efx->net_dev, spec->dmaq_id, + flow_id, filter_idx)) + return false; + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + MC_CMD_FILTER_OP_IN_OP_REMOVE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, + table->entry[filter_idx].handle); + if (efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), 0, + efx_ef10_filter_rfs_expire_complete, filter_idx)) + return false; + + table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY; + return true; +} + +static void +efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx, + unsigned long filter_idx, + int rc, efx_dword_t *outbuf, + size_t outlen_actual) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + struct efx_filter_spec *spec = + efx_ef10_filter_entry_spec(table, filter_idx); + + spin_lock_bh(&efx->filter_lock); + if (rc == 0) { + kfree(spec); + efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); + } + table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY; + wake_up_all(&table->waitq); + spin_unlock_bh(&efx->filter_lock); +} + +#endif /* CONFIG_RFS_ACCEL */ + +static int efx_ef10_filter_match_flags_from_mcdi(u32 mcdi_flags) +{ + int match_flags = 0; + +#define MAP_FLAG(gen_flag, mcdi_field) { \ + u32 old_mcdi_flags = mcdi_flags; \ + mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_IN_MATCH_ ## \ + mcdi_field ## _LBN); \ + if (mcdi_flags != old_mcdi_flags) \ + match_flags |= EFX_FILTER_MATCH_ ## gen_flag; \ + } + MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST); + MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST); + MAP_FLAG(REM_HOST, SRC_IP); + MAP_FLAG(LOC_HOST, DST_IP); + MAP_FLAG(REM_MAC, SRC_MAC); + MAP_FLAG(REM_PORT, SRC_PORT); + MAP_FLAG(LOC_MAC, DST_MAC); + MAP_FLAG(LOC_PORT, DST_PORT); + MAP_FLAG(ETHER_TYPE, ETHER_TYPE); + MAP_FLAG(INNER_VID, INNER_VLAN); + MAP_FLAG(OUTER_VID, OUTER_VLAN); + MAP_FLAG(IP_PROTO, IP_PROTO); +#undef MAP_FLAG + + /* Did we map them all? */ + if (mcdi_flags) + return -EINVAL; + + return match_flags; +} + +static int efx_ef10_filter_table_probe(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX); + unsigned int pd_match_pri, pd_match_count; + struct efx_ef10_filter_table *table; + size_t outlen; + int rc; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + /* Find out which RX filter types are supported, and their priorities */ + MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP, + MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO, + inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), + &outlen); + if (rc) + goto fail; + pd_match_count = MCDI_VAR_ARRAY_LEN( + outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES); + table->rx_match_count = 0; + + for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) { + u32 mcdi_flags = + MCDI_ARRAY_DWORD( + outbuf, + GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES, + pd_match_pri); + rc = efx_ef10_filter_match_flags_from_mcdi(mcdi_flags); + if (rc < 0) { + netif_dbg(efx, probe, efx->net_dev, + "%s: fw flags %#x pri %u not supported in driver\n", + __func__, mcdi_flags, pd_match_pri); + } else { + netif_dbg(efx, probe, efx->net_dev, + "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n", + __func__, mcdi_flags, pd_match_pri, + rc, table->rx_match_count); + table->rx_match_flags[table->rx_match_count++] = rc; + } + } + + table->entry = vzalloc(HUNT_FILTER_TBL_ROWS * sizeof(*table->entry)); + if (!table->entry) { + rc = -ENOMEM; + goto fail; + } + + efx->filter_state = table; + init_waitqueue_head(&table->waitq); + return 0; + +fail: + kfree(table); + return rc; +} + +static void efx_ef10_filter_table_restore(struct efx_nic *efx) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_filter_spec *spec; + unsigned int filter_idx; + bool failed = false; + int rc; + + if (!nic_data->must_restore_filters) + return; + + spin_lock_bh(&efx->filter_lock); + + for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { + spec = efx_ef10_filter_entry_spec(table, filter_idx); + if (!spec) + continue; + + table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY; + spin_unlock_bh(&efx->filter_lock); + + rc = efx_ef10_filter_push(efx, spec, + &table->entry[filter_idx].handle, + false); + if (rc) + failed = true; + + spin_lock_bh(&efx->filter_lock); + if (rc) { + kfree(spec); + efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); + } else { + table->entry[filter_idx].spec &= + ~EFX_EF10_FILTER_FLAG_BUSY; + } + } + + spin_unlock_bh(&efx->filter_lock); + + if (failed) + netif_err(efx, hw, efx->net_dev, + "unable to restore all filters\n"); + else + nic_data->must_restore_filters = false; +} + +static void efx_ef10_filter_table_remove(struct efx_nic *efx) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + struct efx_filter_spec *spec; + unsigned int filter_idx; + int rc; + + for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { + spec = efx_ef10_filter_entry_spec(table, filter_idx); + if (!spec) + continue; + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + efx_ef10_filter_is_exclusive(spec) ? + MC_CMD_FILTER_OP_IN_OP_REMOVE : + MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, + table->entry[filter_idx].handle); + rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), + NULL, 0, NULL); + + WARN_ON(rc != 0); + kfree(spec); + } + + vfree(table->entry); + kfree(table); +} + +static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) +{ + struct efx_ef10_filter_table *table = efx->filter_state; + struct net_device *net_dev = efx->net_dev; + struct efx_filter_spec spec; + bool remove_failed = false; + struct netdev_hw_addr *uc; + struct netdev_hw_addr *mc; + unsigned int filter_idx; + int i, n, rc; + + if (!efx_dev_registered(efx)) + return; + + /* Mark old filters that may need to be removed */ + spin_lock_bh(&efx->filter_lock); + n = table->stack_uc_count < 0 ? 1 : table->stack_uc_count; + for (i = 0; i < n; i++) { + filter_idx = table->stack_uc_list[i].id % HUNT_FILTER_TBL_ROWS; + table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_STACK_OLD; + } + n = table->stack_mc_count < 0 ? 1 : table->stack_mc_count; + for (i = 0; i < n; i++) { + filter_idx = table->stack_mc_list[i].id % HUNT_FILTER_TBL_ROWS; + table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_STACK_OLD; + } + spin_unlock_bh(&efx->filter_lock); + + /* Copy/convert the address lists; add the primary station + * address and broadcast address + */ + netif_addr_lock_bh(net_dev); + if (net_dev->flags & IFF_PROMISC || + netdev_uc_count(net_dev) >= EFX_EF10_FILTER_STACK_UC_MAX) { + table->stack_uc_count = -1; + } else { + table->stack_uc_count = 1 + netdev_uc_count(net_dev); + memcpy(table->stack_uc_list[0].addr, net_dev->dev_addr, + ETH_ALEN); + i = 1; + netdev_for_each_uc_addr(uc, net_dev) { + memcpy(table->stack_uc_list[i].addr, + uc->addr, ETH_ALEN); + i++; + } + } + if (net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI) || + netdev_mc_count(net_dev) >= EFX_EF10_FILTER_STACK_MC_MAX) { + table->stack_mc_count = -1; + } else { + table->stack_mc_count = 1 + netdev_mc_count(net_dev); + eth_broadcast_addr(table->stack_mc_list[0].addr); + i = 1; + netdev_for_each_mc_addr(mc, net_dev) { + memcpy(table->stack_mc_list[i].addr, + mc->addr, ETH_ALEN); + i++; + } + } + netif_addr_unlock_bh(net_dev); + + /* Insert/renew unicast filters */ + if (table->stack_uc_count >= 0) { + for (i = 0; i < table->stack_uc_count; i++) { + efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED, + EFX_FILTER_FLAG_RX_RSS | + EFX_FILTER_FLAG_RX_STACK, + 0); + efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, + table->stack_uc_list[i].addr); + rc = efx_ef10_filter_insert(efx, &spec, true); + if (rc < 0) { + /* Fall back to unicast-promisc */ + while (i--) + efx_ef10_filter_remove_safe( + efx, EFX_FILTER_PRI_REQUIRED, + table->stack_uc_list[i].id); + table->stack_uc_count = -1; + break; + } + table->stack_uc_list[i].id = rc; + } + } + if (table->stack_uc_count < 0) { + efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED, + EFX_FILTER_FLAG_RX_RSS | + EFX_FILTER_FLAG_RX_STACK, + 0); + efx_filter_set_uc_def(&spec); + rc = efx_ef10_filter_insert(efx, &spec, true); + if (rc < 0) { + WARN_ON(1); + table->stack_uc_count = 0; + } else { + table->stack_uc_list[0].id = rc; + } + } + + /* Insert/renew multicast filters */ + if (table->stack_mc_count >= 0) { + for (i = 0; i < table->stack_mc_count; i++) { + efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED, + EFX_FILTER_FLAG_RX_RSS | + EFX_FILTER_FLAG_RX_STACK, + 0); + efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, + table->stack_mc_list[i].addr); + rc = efx_ef10_filter_insert(efx, &spec, true); + if (rc < 0) { + /* Fall back to multicast-promisc */ + while (i--) + efx_ef10_filter_remove_safe( + efx, EFX_FILTER_PRI_REQUIRED, + table->stack_mc_list[i].id); + table->stack_mc_count = -1; + break; + } + table->stack_mc_list[i].id = rc; + } + } + if (table->stack_mc_count < 0) { + efx_filter_init_rx(&spec, EFX_FILTER_PRI_REQUIRED, + EFX_FILTER_FLAG_RX_RSS | + EFX_FILTER_FLAG_RX_STACK, + 0); + efx_filter_set_mc_def(&spec); + rc = efx_ef10_filter_insert(efx, &spec, true); + if (rc < 0) { + WARN_ON(1); + table->stack_mc_count = 0; + } else { + table->stack_mc_list[0].id = rc; + } + } + + /* Remove filters that weren't renewed. Since nothing else + * changes the STACK_OLD flag or removes these filters, we + * don't need to hold the filter_lock while scanning for + * these filters. + */ + for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) { + if (ACCESS_ONCE(table->entry[i].spec) & + EFX_EF10_FILTER_FLAG_STACK_OLD) { + if (efx_ef10_filter_remove_internal(efx, + EFX_FILTER_PRI_REQUIRED, + i, true) < 0) + remove_failed = true; + } + } + WARN_ON(remove_failed); +} + +static int efx_ef10_mac_reconfigure(struct efx_nic *efx) +{ + efx_ef10_filter_sync_rx_mode(efx); + + return efx_mcdi_set_mac(efx); +} + +#ifdef CONFIG_SFC_MTD + +struct efx_ef10_nvram_type_info { + u16 type, type_mask; + u8 port; + const char *name; +}; + +static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = { + { NVRAM_PARTITION_TYPE_MC_FIRMWARE, 0, 0, "sfc_mcfw" }, + { NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP, 0, 0, "sfc_mcfw_backup" }, + { NVRAM_PARTITION_TYPE_EXPANSION_ROM, 0, 0, "sfc_exp_rom" }, + { NVRAM_PARTITION_TYPE_STATIC_CONFIG, 0, 0, "sfc_static_cfg" }, + { NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG, 0, 0, "sfc_dynamic_cfg" }, + { NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0, 0, 0, "sfc_exp_rom_cfg" }, + { NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1, 0, 1, "sfc_exp_rom_cfg" }, + { NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2, 0, 2, "sfc_exp_rom_cfg" }, + { NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3, 0, 3, "sfc_exp_rom_cfg" }, + { NVRAM_PARTITION_TYPE_PHY_MIN, 0xff, 0, "sfc_phy_fw" }, +}; + +static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, + struct efx_mcdi_mtd_partition *part, + unsigned int type) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX); + const struct efx_ef10_nvram_type_info *info; + size_t size, erase_size, outlen; + bool protected; + int rc; + + for (info = efx_ef10_nvram_types; ; info++) { + if (info == + efx_ef10_nvram_types + ARRAY_SIZE(efx_ef10_nvram_types)) + return -ENODEV; + if ((type & ~info->type_mask) == info->type) + break; + } + if (info->port != efx_port_num(efx)) + return -ENODEV; + + rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected); + if (rc) + return rc; + if (protected) + return -ENODEV; /* hide it */ + + part->nvram_type = type; + + MCDI_SET_DWORD(inbuf, NVRAM_METADATA_IN_TYPE, type); + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_METADATA, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_NVRAM_METADATA_OUT_LENMIN) + return -EIO; + if (MCDI_DWORD(outbuf, NVRAM_METADATA_OUT_FLAGS) & + (1 << MC_CMD_NVRAM_METADATA_OUT_SUBTYPE_VALID_LBN)) + part->fw_subtype = MCDI_DWORD(outbuf, + NVRAM_METADATA_OUT_SUBTYPE); + + part->common.dev_type_name = "EF10 NVRAM manager"; + part->common.type_name = info->name; + + part->common.mtd.type = MTD_NORFLASH; + part->common.mtd.flags = MTD_CAP_NORFLASH; + part->common.mtd.size = size; + part->common.mtd.erasesize = erase_size; + + return 0; +} + +static int efx_ef10_mtd_probe(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX); + struct efx_mcdi_mtd_partition *parts; + size_t outlen, n_parts_total, i, n_parts; + unsigned int type; + int rc; + + ASSERT_RTNL(); + + BUILD_BUG_ON(MC_CMD_NVRAM_PARTITIONS_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_PARTITIONS, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_NVRAM_PARTITIONS_OUT_LENMIN) + return -EIO; + + n_parts_total = MCDI_DWORD(outbuf, NVRAM_PARTITIONS_OUT_NUM_PARTITIONS); + if (n_parts_total > + MCDI_VAR_ARRAY_LEN(outlen, NVRAM_PARTITIONS_OUT_TYPE_ID)) + return -EIO; + + parts = kcalloc(n_parts_total, sizeof(*parts), GFP_KERNEL); + if (!parts) + return -ENOMEM; + + n_parts = 0; + for (i = 0; i < n_parts_total; i++) { + type = MCDI_ARRAY_DWORD(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID, + i); + rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type); + if (rc == 0) + n_parts++; + else if (rc != -ENODEV) + goto fail; + } + + rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); +fail: + if (rc) + kfree(parts); + return rc; +} + +#endif /* CONFIG_SFC_MTD */ + +static void efx_ef10_ptp_write_host_time(struct efx_nic *efx, u32 host_time) +{ + _efx_writed(efx, cpu_to_le32(host_time), ER_DZ_MC_DB_LWRD); +} + +const struct efx_nic_type efx_hunt_a0_nic_type = { + .mem_map_size = efx_ef10_mem_map_size, + .probe = efx_ef10_probe, + .remove = efx_ef10_remove, + .dimension_resources = efx_ef10_dimension_resources, + .init = efx_ef10_init_nic, + .fini = efx_port_dummy_op_void, + .map_reset_reason = efx_mcdi_map_reset_reason, + .map_reset_flags = efx_ef10_map_reset_flags, + .reset = efx_mcdi_reset, + .probe_port = efx_mcdi_port_probe, + .remove_port = efx_mcdi_port_remove, + .fini_dmaq = efx_ef10_fini_dmaq, + .describe_stats = efx_ef10_describe_stats, + .update_stats = efx_ef10_update_stats, + .start_stats = efx_mcdi_mac_start_stats, + .stop_stats = efx_mcdi_mac_stop_stats, + .set_id_led = efx_mcdi_set_id_led, + .push_irq_moderation = efx_ef10_push_irq_moderation, + .reconfigure_mac = efx_ef10_mac_reconfigure, + .check_mac_fault = efx_mcdi_mac_check_fault, + .reconfigure_port = efx_mcdi_port_reconfigure, + .get_wol = efx_ef10_get_wol, + .set_wol = efx_ef10_set_wol, + .resume_wol = efx_port_dummy_op_void, + /* TODO: test_chip */ + .test_nvram = efx_mcdi_nvram_test_all, + .mcdi_request = efx_ef10_mcdi_request, + .mcdi_poll_response = efx_ef10_mcdi_poll_response, + .mcdi_read_response = efx_ef10_mcdi_read_response, + .mcdi_poll_reboot = efx_ef10_mcdi_poll_reboot, + .irq_enable_master = efx_port_dummy_op_void, + .irq_test_generate = efx_ef10_irq_test_generate, + .irq_disable_non_ev = efx_port_dummy_op_void, + .irq_handle_msi = efx_ef10_msi_interrupt, + .irq_handle_legacy = efx_ef10_legacy_interrupt, + .tx_probe = efx_ef10_tx_probe, + .tx_init = efx_ef10_tx_init, + .tx_remove = efx_ef10_tx_remove, + .tx_write = efx_ef10_tx_write, + .rx_push_indir_table = efx_ef10_rx_push_indir_table, + .rx_probe = efx_ef10_rx_probe, + .rx_init = efx_ef10_rx_init, + .rx_remove = efx_ef10_rx_remove, + .rx_write = efx_ef10_rx_write, + .rx_defer_refill = efx_ef10_rx_defer_refill, + .ev_probe = efx_ef10_ev_probe, + .ev_init = efx_ef10_ev_init, + .ev_fini = efx_ef10_ev_fini, + .ev_remove = efx_ef10_ev_remove, + .ev_process = efx_ef10_ev_process, + .ev_read_ack = efx_ef10_ev_read_ack, + .ev_test_generate = efx_ef10_ev_test_generate, + .filter_table_probe = efx_ef10_filter_table_probe, + .filter_table_restore = efx_ef10_filter_table_restore, + .filter_table_remove = efx_ef10_filter_table_remove, + .filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter, + .filter_insert = efx_ef10_filter_insert, + .filter_remove_safe = efx_ef10_filter_remove_safe, + .filter_get_safe = efx_ef10_filter_get_safe, + .filter_clear_rx = efx_ef10_filter_clear_rx, + .filter_count_rx_used = efx_ef10_filter_count_rx_used, + .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit, + .filter_get_rx_ids = efx_ef10_filter_get_rx_ids, +#ifdef CONFIG_RFS_ACCEL + .filter_rfs_insert = efx_ef10_filter_rfs_insert, + .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one, +#endif +#ifdef CONFIG_SFC_MTD + .mtd_probe = efx_ef10_mtd_probe, + .mtd_rename = efx_mcdi_mtd_rename, + .mtd_read = efx_mcdi_mtd_read, + .mtd_erase = efx_mcdi_mtd_erase, + .mtd_write = efx_mcdi_mtd_write, + .mtd_sync = efx_mcdi_mtd_sync, +#endif + .ptp_write_host_time = efx_ef10_ptp_write_host_time, + + .revision = EFX_REV_HUNT_A0, + .max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH), + .rx_prefix_size = ES_DZ_RX_PREFIX_SIZE, + .rx_hash_offset = ES_DZ_RX_PREFIX_HASH_OFST, + .can_rx_scatter = true, + .always_rx_scatter = true, + .max_interrupt_mode = EFX_INT_MODE_MSIX, + .timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH, + .offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXHASH | NETIF_F_NTUPLE), + .mcdi_max_ver = 2, + .max_rx_ip_filters = HUNT_FILTER_TBL_ROWS, +}; diff --git a/drivers/net/ethernet/sfc/ef10_regs.h b/drivers/net/ethernet/sfc/ef10_regs.h new file mode 100644 index 000000000000..b3f4e3755fd9 --- /dev/null +++ b/drivers/net/ethernet/sfc/ef10_regs.h @@ -0,0 +1,415 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2012-2013 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_EF10_REGS_H +#define EFX_EF10_REGS_H + +/* EF10 hardware architecture definitions have a name prefix following + * the format: + * + * E<type>_<min-rev><max-rev>_ + * + * The following <type> strings are used: + * + * MMIO register Host memory structure + * ------------------------------------------------------------- + * Address R + * Bitfield RF SF + * Enumerator FE SE + * + * <min-rev> is the first revision to which the definition applies: + * + * D: Huntington A0 + * + * If the definition has been changed or removed in later revisions + * then <max-rev> is the last revision to which the definition applies; + * otherwise it is "Z". + */ + +/************************************************************************** + * + * EF10 registers and descriptors + * + ************************************************************************** + */ + +/* BIU_HW_REV_ID_REG: */ +#define ER_DZ_BIU_HW_REV_ID 0x00000000 +#define ERF_DZ_HW_REV_ID_LBN 0 +#define ERF_DZ_HW_REV_ID_WIDTH 32 + +/* BIU_MC_SFT_STATUS_REG: */ +#define ER_DZ_BIU_MC_SFT_STATUS 0x00000010 +#define ER_DZ_BIU_MC_SFT_STATUS_STEP 4 +#define ER_DZ_BIU_MC_SFT_STATUS_ROWS 8 +#define ERF_DZ_MC_SFT_STATUS_LBN 0 +#define ERF_DZ_MC_SFT_STATUS_WIDTH 32 + +/* BIU_INT_ISR_REG: */ +#define ER_DZ_BIU_INT_ISR 0x00000090 +#define ERF_DZ_ISR_REG_LBN 0 +#define ERF_DZ_ISR_REG_WIDTH 32 + +/* MC_DB_LWRD_REG: */ +#define ER_DZ_MC_DB_LWRD 0x00000200 +#define ERF_DZ_MC_DOORBELL_L_LBN 0 +#define ERF_DZ_MC_DOORBELL_L_WIDTH 32 + +/* MC_DB_HWRD_REG: */ +#define ER_DZ_MC_DB_HWRD 0x00000204 +#define ERF_DZ_MC_DOORBELL_H_LBN 0 +#define ERF_DZ_MC_DOORBELL_H_WIDTH 32 + +/* EVQ_RPTR_REG: */ +#define ER_DZ_EVQ_RPTR 0x00000400 +#define ER_DZ_EVQ_RPTR_STEP 8192 +#define ER_DZ_EVQ_RPTR_ROWS 2048 +#define ERF_DZ_EVQ_RPTR_VLD_LBN 15 +#define ERF_DZ_EVQ_RPTR_VLD_WIDTH 1 +#define ERF_DZ_EVQ_RPTR_LBN 0 +#define ERF_DZ_EVQ_RPTR_WIDTH 15 + +/* EVQ_TMR_REG: */ +#define ER_DZ_EVQ_TMR 0x00000420 +#define ER_DZ_EVQ_TMR_STEP 8192 +#define ER_DZ_EVQ_TMR_ROWS 2048 +#define ERF_DZ_TC_TIMER_MODE_LBN 14 +#define ERF_DZ_TC_TIMER_MODE_WIDTH 2 +#define ERF_DZ_TC_TIMER_VAL_LBN 0 +#define ERF_DZ_TC_TIMER_VAL_WIDTH 14 + +/* RX_DESC_UPD_REG: */ +#define ER_DZ_RX_DESC_UPD 0x00000830 +#define ER_DZ_RX_DESC_UPD_STEP 8192 +#define ER_DZ_RX_DESC_UPD_ROWS 2048 +#define ERF_DZ_RX_DESC_WPTR_LBN 0 +#define ERF_DZ_RX_DESC_WPTR_WIDTH 12 + +/* TX_DESC_UPD_REG: */ +#define ER_DZ_TX_DESC_UPD 0x00000a10 +#define ER_DZ_TX_DESC_UPD_STEP 8192 +#define ER_DZ_TX_DESC_UPD_ROWS 2048 +#define ERF_DZ_RSVD_LBN 76 +#define ERF_DZ_RSVD_WIDTH 20 +#define ERF_DZ_TX_DESC_WPTR_LBN 64 +#define ERF_DZ_TX_DESC_WPTR_WIDTH 12 +#define ERF_DZ_TX_DESC_HWORD_LBN 32 +#define ERF_DZ_TX_DESC_HWORD_WIDTH 32 +#define ERF_DZ_TX_DESC_LWORD_LBN 0 +#define ERF_DZ_TX_DESC_LWORD_WIDTH 32 + +/* DRIVER_EV */ +#define ESF_DZ_DRV_CODE_LBN 60 +#define ESF_DZ_DRV_CODE_WIDTH 4 +#define ESF_DZ_DRV_SUB_CODE_LBN 56 +#define ESF_DZ_DRV_SUB_CODE_WIDTH 4 +#define ESE_DZ_DRV_TIMER_EV 3 +#define ESE_DZ_DRV_START_UP_EV 2 +#define ESE_DZ_DRV_WAKE_UP_EV 1 +#define ESF_DZ_DRV_SUB_DATA_LBN 0 +#define ESF_DZ_DRV_SUB_DATA_WIDTH 56 +#define ESF_DZ_DRV_EVQ_ID_LBN 0 +#define ESF_DZ_DRV_EVQ_ID_WIDTH 14 +#define ESF_DZ_DRV_TMR_ID_LBN 0 +#define ESF_DZ_DRV_TMR_ID_WIDTH 14 + +/* EVENT_ENTRY */ +#define ESF_DZ_EV_CODE_LBN 60 +#define ESF_DZ_EV_CODE_WIDTH 4 +#define ESE_DZ_EV_CODE_MCDI_EV 12 +#define ESE_DZ_EV_CODE_DRIVER_EV 5 +#define ESE_DZ_EV_CODE_TX_EV 2 +#define ESE_DZ_EV_CODE_RX_EV 0 +#define ESE_DZ_OTHER other +#define ESF_DZ_EV_DATA_LBN 0 +#define ESF_DZ_EV_DATA_WIDTH 60 + +/* MC_EVENT */ +#define ESF_DZ_MC_CODE_LBN 60 +#define ESF_DZ_MC_CODE_WIDTH 4 +#define ESF_DZ_MC_OVERRIDE_HOLDOFF_LBN 59 +#define ESF_DZ_MC_OVERRIDE_HOLDOFF_WIDTH 1 +#define ESF_DZ_MC_DROP_EVENT_LBN 58 +#define ESF_DZ_MC_DROP_EVENT_WIDTH 1 +#define ESF_DZ_MC_SOFT_LBN 0 +#define ESF_DZ_MC_SOFT_WIDTH 58 + +/* RX_EVENT */ +#define ESF_DZ_RX_CODE_LBN 60 +#define ESF_DZ_RX_CODE_WIDTH 4 +#define ESF_DZ_RX_OVERRIDE_HOLDOFF_LBN 59 +#define ESF_DZ_RX_OVERRIDE_HOLDOFF_WIDTH 1 +#define ESF_DZ_RX_DROP_EVENT_LBN 58 +#define ESF_DZ_RX_DROP_EVENT_WIDTH 1 +#define ESF_DZ_RX_EV_RSVD2_LBN 54 +#define ESF_DZ_RX_EV_RSVD2_WIDTH 4 +#define ESF_DZ_RX_EV_SOFT2_LBN 52 +#define ESF_DZ_RX_EV_SOFT2_WIDTH 2 +#define ESF_DZ_RX_DSC_PTR_LBITS_LBN 48 +#define ESF_DZ_RX_DSC_PTR_LBITS_WIDTH 4 +#define ESF_DZ_RX_L4_CLASS_LBN 45 +#define ESF_DZ_RX_L4_CLASS_WIDTH 3 +#define ESE_DZ_L4_CLASS_RSVD7 7 +#define ESE_DZ_L4_CLASS_RSVD6 6 +#define ESE_DZ_L4_CLASS_RSVD5 5 +#define ESE_DZ_L4_CLASS_RSVD4 4 +#define ESE_DZ_L4_CLASS_RSVD3 3 +#define ESE_DZ_L4_CLASS_UDP 2 +#define ESE_DZ_L4_CLASS_TCP 1 +#define ESE_DZ_L4_CLASS_UNKNOWN 0 +#define ESF_DZ_RX_L3_CLASS_LBN 42 +#define ESF_DZ_RX_L3_CLASS_WIDTH 3 +#define ESE_DZ_L3_CLASS_RSVD7 7 +#define ESE_DZ_L3_CLASS_IP6_FRAG 6 +#define ESE_DZ_L3_CLASS_ARP 5 +#define ESE_DZ_L3_CLASS_IP4_FRAG 4 +#define ESE_DZ_L3_CLASS_FCOE 3 +#define ESE_DZ_L3_CLASS_IP6 2 +#define ESE_DZ_L3_CLASS_IP4 1 +#define ESE_DZ_L3_CLASS_UNKNOWN 0 +#define ESF_DZ_RX_ETH_TAG_CLASS_LBN 39 +#define ESF_DZ_RX_ETH_TAG_CLASS_WIDTH 3 +#define ESE_DZ_ETH_TAG_CLASS_RSVD7 7 +#define ESE_DZ_ETH_TAG_CLASS_RSVD6 6 +#define ESE_DZ_ETH_TAG_CLASS_RSVD5 5 +#define ESE_DZ_ETH_TAG_CLASS_RSVD4 4 +#define ESE_DZ_ETH_TAG_CLASS_RSVD3 3 +#define ESE_DZ_ETH_TAG_CLASS_VLAN2 2 +#define ESE_DZ_ETH_TAG_CLASS_VLAN1 1 +#define ESE_DZ_ETH_TAG_CLASS_NONE 0 +#define ESF_DZ_RX_ETH_BASE_CLASS_LBN 36 +#define ESF_DZ_RX_ETH_BASE_CLASS_WIDTH 3 +#define ESE_DZ_ETH_BASE_CLASS_LLC_SNAP 2 +#define ESE_DZ_ETH_BASE_CLASS_LLC 1 +#define ESE_DZ_ETH_BASE_CLASS_ETH2 0 +#define ESF_DZ_RX_MAC_CLASS_LBN 35 +#define ESF_DZ_RX_MAC_CLASS_WIDTH 1 +#define ESE_DZ_MAC_CLASS_MCAST 1 +#define ESE_DZ_MAC_CLASS_UCAST 0 +#define ESF_DZ_RX_EV_SOFT1_LBN 32 +#define ESF_DZ_RX_EV_SOFT1_WIDTH 3 +#define ESF_DZ_RX_EV_RSVD1_LBN 31 +#define ESF_DZ_RX_EV_RSVD1_WIDTH 1 +#define ESF_DZ_RX_ABORT_LBN 30 +#define ESF_DZ_RX_ABORT_WIDTH 1 +#define ESF_DZ_RX_ECC_ERR_LBN 29 +#define ESF_DZ_RX_ECC_ERR_WIDTH 1 +#define ESF_DZ_RX_CRC1_ERR_LBN 28 +#define ESF_DZ_RX_CRC1_ERR_WIDTH 1 +#define ESF_DZ_RX_CRC0_ERR_LBN 27 +#define ESF_DZ_RX_CRC0_ERR_WIDTH 1 +#define ESF_DZ_RX_TCPUDP_CKSUM_ERR_LBN 26 +#define ESF_DZ_RX_TCPUDP_CKSUM_ERR_WIDTH 1 +#define ESF_DZ_RX_IPCKSUM_ERR_LBN 25 +#define ESF_DZ_RX_IPCKSUM_ERR_WIDTH 1 +#define ESF_DZ_RX_ECRC_ERR_LBN 24 +#define ESF_DZ_RX_ECRC_ERR_WIDTH 1 +#define ESF_DZ_RX_QLABEL_LBN 16 +#define ESF_DZ_RX_QLABEL_WIDTH 5 +#define ESF_DZ_RX_PARSE_INCOMPLETE_LBN 15 +#define ESF_DZ_RX_PARSE_INCOMPLETE_WIDTH 1 +#define ESF_DZ_RX_CONT_LBN 14 +#define ESF_DZ_RX_CONT_WIDTH 1 +#define ESF_DZ_RX_BYTES_LBN 0 +#define ESF_DZ_RX_BYTES_WIDTH 14 + +/* RX_KER_DESC */ +#define ESF_DZ_RX_KER_RESERVED_LBN 62 +#define ESF_DZ_RX_KER_RESERVED_WIDTH 2 +#define ESF_DZ_RX_KER_BYTE_CNT_LBN 48 +#define ESF_DZ_RX_KER_BYTE_CNT_WIDTH 14 +#define ESF_DZ_RX_KER_BUF_ADDR_LBN 0 +#define ESF_DZ_RX_KER_BUF_ADDR_WIDTH 48 + +/* RX_USER_DESC */ +#define ESF_DZ_RX_USR_RESERVED_LBN 62 +#define ESF_DZ_RX_USR_RESERVED_WIDTH 2 +#define ESF_DZ_RX_USR_BYTE_CNT_LBN 48 +#define ESF_DZ_RX_USR_BYTE_CNT_WIDTH 14 +#define ESF_DZ_RX_USR_BUF_PAGE_SIZE_LBN 44 +#define ESF_DZ_RX_USR_BUF_PAGE_SIZE_WIDTH 4 +#define ESE_DZ_USR_BUF_PAGE_SZ_4MB 10 +#define ESE_DZ_USR_BUF_PAGE_SZ_1MB 8 +#define ESE_DZ_USR_BUF_PAGE_SZ_64KB 4 +#define ESE_DZ_USR_BUF_PAGE_SZ_4KB 0 +#define ESF_DZ_RX_USR_BUF_ID_OFFSET_LBN 0 +#define ESF_DZ_RX_USR_BUF_ID_OFFSET_WIDTH 44 +#define ESF_DZ_RX_USR_4KBPS_BUF_ID_LBN 12 +#define ESF_DZ_RX_USR_4KBPS_BUF_ID_WIDTH 32 +#define ESF_DZ_RX_USR_64KBPS_BUF_ID_LBN 16 +#define ESF_DZ_RX_USR_64KBPS_BUF_ID_WIDTH 28 +#define ESF_DZ_RX_USR_1MBPS_BUF_ID_LBN 20 +#define ESF_DZ_RX_USR_1MBPS_BUF_ID_WIDTH 24 +#define ESF_DZ_RX_USR_4MBPS_BUF_ID_LBN 22 +#define ESF_DZ_RX_USR_4MBPS_BUF_ID_WIDTH 22 +#define ESF_DZ_RX_USR_4MBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_RX_USR_4MBPS_BYTE_OFFSET_WIDTH 22 +#define ESF_DZ_RX_USR_1MBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_RX_USR_1MBPS_BYTE_OFFSET_WIDTH 20 +#define ESF_DZ_RX_USR_64KBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_RX_USR_64KBPS_BYTE_OFFSET_WIDTH 16 +#define ESF_DZ_RX_USR_4KBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_RX_USR_4KBPS_BYTE_OFFSET_WIDTH 12 + +/* TX_CSUM_TSTAMP_DESC */ +#define ESF_DZ_TX_DESC_IS_OPT_LBN 63 +#define ESF_DZ_TX_DESC_IS_OPT_WIDTH 1 +#define ESF_DZ_TX_OPTION_TYPE_LBN 60 +#define ESF_DZ_TX_OPTION_TYPE_WIDTH 3 +#define ESE_DZ_TX_OPTION_DESC_TSO 7 +#define ESE_DZ_TX_OPTION_DESC_VLAN 6 +#define ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0 +#define ESF_DZ_TX_TIMESTAMP_LBN 5 +#define ESF_DZ_TX_TIMESTAMP_WIDTH 1 +#define ESF_DZ_TX_OPTION_CRC_MODE_LBN 2 +#define ESF_DZ_TX_OPTION_CRC_MODE_WIDTH 3 +#define ESE_DZ_TX_OPTION_CRC_FCOIP_MPA 5 +#define ESE_DZ_TX_OPTION_CRC_FCOIP_FCOE 4 +#define ESE_DZ_TX_OPTION_CRC_ISCSI_HDR_AND_PYLD 3 +#define ESE_DZ_TX_OPTION_CRC_ISCSI_HDR 2 +#define ESE_DZ_TX_OPTION_CRC_FCOE 1 +#define ESE_DZ_TX_OPTION_CRC_OFF 0 +#define ESF_DZ_TX_OPTION_UDP_TCP_CSUM_LBN 1 +#define ESF_DZ_TX_OPTION_UDP_TCP_CSUM_WIDTH 1 +#define ESF_DZ_TX_OPTION_IP_CSUM_LBN 0 +#define ESF_DZ_TX_OPTION_IP_CSUM_WIDTH 1 + +/* TX_EVENT */ +#define ESF_DZ_TX_CODE_LBN 60 +#define ESF_DZ_TX_CODE_WIDTH 4 +#define ESF_DZ_TX_OVERRIDE_HOLDOFF_LBN 59 +#define ESF_DZ_TX_OVERRIDE_HOLDOFF_WIDTH 1 +#define ESF_DZ_TX_DROP_EVENT_LBN 58 +#define ESF_DZ_TX_DROP_EVENT_WIDTH 1 +#define ESF_DZ_TX_EV_RSVD_LBN 48 +#define ESF_DZ_TX_EV_RSVD_WIDTH 10 +#define ESF_DZ_TX_SOFT2_LBN 32 +#define ESF_DZ_TX_SOFT2_WIDTH 16 +#define ESF_DZ_TX_CAN_MERGE_LBN 31 +#define ESF_DZ_TX_CAN_MERGE_WIDTH 1 +#define ESF_DZ_TX_SOFT1_LBN 24 +#define ESF_DZ_TX_SOFT1_WIDTH 7 +#define ESF_DZ_TX_QLABEL_LBN 16 +#define ESF_DZ_TX_QLABEL_WIDTH 5 +#define ESF_DZ_TX_DESCR_INDX_LBN 0 +#define ESF_DZ_TX_DESCR_INDX_WIDTH 16 + +/* TX_KER_DESC */ +#define ESF_DZ_TX_KER_TYPE_LBN 63 +#define ESF_DZ_TX_KER_TYPE_WIDTH 1 +#define ESF_DZ_TX_KER_CONT_LBN 62 +#define ESF_DZ_TX_KER_CONT_WIDTH 1 +#define ESF_DZ_TX_KER_BYTE_CNT_LBN 48 +#define ESF_DZ_TX_KER_BYTE_CNT_WIDTH 14 +#define ESF_DZ_TX_KER_BUF_ADDR_LBN 0 +#define ESF_DZ_TX_KER_BUF_ADDR_WIDTH 48 + +/* TX_PIO_DESC */ +#define ESF_DZ_TX_PIO_TYPE_LBN 63 +#define ESF_DZ_TX_PIO_TYPE_WIDTH 1 +#define ESF_DZ_TX_PIO_OPT_LBN 60 +#define ESF_DZ_TX_PIO_OPT_WIDTH 3 +#define ESF_DZ_TX_PIO_CONT_LBN 59 +#define ESF_DZ_TX_PIO_CONT_WIDTH 1 +#define ESF_DZ_TX_PIO_BYTE_CNT_LBN 32 +#define ESF_DZ_TX_PIO_BYTE_CNT_WIDTH 12 +#define ESF_DZ_TX_PIO_BUF_ADDR_LBN 0 +#define ESF_DZ_TX_PIO_BUF_ADDR_WIDTH 12 + +/* TX_TSO_DESC */ +#define ESF_DZ_TX_DESC_IS_OPT_LBN 63 +#define ESF_DZ_TX_DESC_IS_OPT_WIDTH 1 +#define ESF_DZ_TX_OPTION_TYPE_LBN 60 +#define ESF_DZ_TX_OPTION_TYPE_WIDTH 3 +#define ESE_DZ_TX_OPTION_DESC_TSO 7 +#define ESE_DZ_TX_OPTION_DESC_VLAN 6 +#define ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0 +#define ESF_DZ_TX_TSO_TCP_FLAGS_LBN 48 +#define ESF_DZ_TX_TSO_TCP_FLAGS_WIDTH 8 +#define ESF_DZ_TX_TSO_IP_ID_LBN 32 +#define ESF_DZ_TX_TSO_IP_ID_WIDTH 16 +#define ESF_DZ_TX_TSO_TCP_SEQNO_LBN 0 +#define ESF_DZ_TX_TSO_TCP_SEQNO_WIDTH 32 + +/* TX_USER_DESC */ +#define ESF_DZ_TX_USR_TYPE_LBN 63 +#define ESF_DZ_TX_USR_TYPE_WIDTH 1 +#define ESF_DZ_TX_USR_CONT_LBN 62 +#define ESF_DZ_TX_USR_CONT_WIDTH 1 +#define ESF_DZ_TX_USR_BYTE_CNT_LBN 48 +#define ESF_DZ_TX_USR_BYTE_CNT_WIDTH 14 +#define ESF_DZ_TX_USR_BUF_PAGE_SIZE_LBN 44 +#define ESF_DZ_TX_USR_BUF_PAGE_SIZE_WIDTH 4 +#define ESE_DZ_USR_BUF_PAGE_SZ_4MB 10 +#define ESE_DZ_USR_BUF_PAGE_SZ_1MB 8 +#define ESE_DZ_USR_BUF_PAGE_SZ_64KB 4 +#define ESE_DZ_USR_BUF_PAGE_SZ_4KB 0 +#define ESF_DZ_TX_USR_BUF_ID_OFFSET_LBN 0 +#define ESF_DZ_TX_USR_BUF_ID_OFFSET_WIDTH 44 +#define ESF_DZ_TX_USR_4KBPS_BUF_ID_LBN 12 +#define ESF_DZ_TX_USR_4KBPS_BUF_ID_WIDTH 32 +#define ESF_DZ_TX_USR_64KBPS_BUF_ID_LBN 16 +#define ESF_DZ_TX_USR_64KBPS_BUF_ID_WIDTH 28 +#define ESF_DZ_TX_USR_1MBPS_BUF_ID_LBN 20 +#define ESF_DZ_TX_USR_1MBPS_BUF_ID_WIDTH 24 +#define ESF_DZ_TX_USR_4MBPS_BUF_ID_LBN 22 +#define ESF_DZ_TX_USR_4MBPS_BUF_ID_WIDTH 22 +#define ESF_DZ_TX_USR_4MBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_TX_USR_4MBPS_BYTE_OFFSET_WIDTH 22 +#define ESF_DZ_TX_USR_1MBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_TX_USR_1MBPS_BYTE_OFFSET_WIDTH 20 +#define ESF_DZ_TX_USR_64KBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_TX_USR_64KBPS_BYTE_OFFSET_WIDTH 16 +#define ESF_DZ_TX_USR_4KBPS_BYTE_OFFSET_LBN 0 +#define ESF_DZ_TX_USR_4KBPS_BYTE_OFFSET_WIDTH 12 +/*************************************************************************/ + +/* TX_DESC_UPD_REG: Transmit descriptor update register. + * We may write just one dword of these registers. + */ +#define ER_DZ_TX_DESC_UPD_DWORD (ER_DZ_TX_DESC_UPD + 2 * 4) +#define ERF_DZ_TX_DESC_WPTR_DWORD_LBN (ERF_DZ_TX_DESC_WPTR_LBN - 2 * 32) +#define ERF_DZ_TX_DESC_WPTR_DWORD_WIDTH ERF_DZ_TX_DESC_WPTR_WIDTH + +/* The workaround for bug 35388 requires multiplexing writes through + * the TX_DESC_UPD_DWORD address. + * TX_DESC_UPD: 0ppppppppppp (bit 11 lost) + * EVQ_RPTR: 1000hhhhhhhh, 1001llllllll (split into high and low bits) + * EVQ_TMR: 11mmvvvvvvvv (bits 8:13 of value lost) + */ +#define ER_DD_EVQ_INDIRECT ER_DZ_TX_DESC_UPD_DWORD +#define ERF_DD_EVQ_IND_RPTR_FLAGS_LBN 8 +#define ERF_DD_EVQ_IND_RPTR_FLAGS_WIDTH 4 +#define EFE_DD_EVQ_IND_RPTR_FLAGS_HIGH 8 +#define EFE_DD_EVQ_IND_RPTR_FLAGS_LOW 9 +#define ERF_DD_EVQ_IND_RPTR_LBN 0 +#define ERF_DD_EVQ_IND_RPTR_WIDTH 8 +#define ERF_DD_EVQ_IND_TIMER_FLAGS_LBN 10 +#define ERF_DD_EVQ_IND_TIMER_FLAGS_WIDTH 2 +#define EFE_DD_EVQ_IND_TIMER_FLAGS 3 +#define ERF_DD_EVQ_IND_TIMER_MODE_LBN 8 +#define ERF_DD_EVQ_IND_TIMER_MODE_WIDTH 2 +#define ERF_DD_EVQ_IND_TIMER_VAL_LBN 0 +#define ERF_DD_EVQ_IND_TIMER_VAL_WIDTH 8 + +/* TX_PIOBUF + * PIO buffer aperture (paged) + */ +#define ER_DZ_TX_PIOBUF 4096 +#define ER_DZ_TX_PIOBUF_SIZE 2048 + +/* RX packet prefix */ +#define ES_DZ_RX_PREFIX_HASH_OFST 0 +#define ES_DZ_RX_PREFIX_VLAN1_OFST 4 +#define ES_DZ_RX_PREFIX_VLAN2_OFST 6 +#define ES_DZ_RX_PREFIX_PKTLEN_OFST 8 +#define ES_DZ_RX_PREFIX_TSTAMP_OFST 10 +#define ES_DZ_RX_PREFIX_SIZE 14 + +#endif /* EFX_EF10_REGS_H */ diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index db2f119d7fec..07c9bc4c61bc 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2005-2011 Solarflare Communications Inc. + * Copyright 2005-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -189,7 +189,7 @@ MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); * *************************************************************************/ -static void efx_soft_enable_interrupts(struct efx_nic *efx); +static int efx_soft_enable_interrupts(struct efx_nic *efx); static void efx_soft_disable_interrupts(struct efx_nic *efx); static void efx_remove_channel(struct efx_channel *channel); static void efx_remove_channels(struct efx_nic *efx); @@ -329,15 +329,23 @@ static int efx_probe_eventq(struct efx_channel *channel) } /* Prepare channel's event queue */ -static void efx_init_eventq(struct efx_channel *channel) +static int efx_init_eventq(struct efx_channel *channel) { - netif_dbg(channel->efx, drv, channel->efx->net_dev, - "chan %d init event queue\n", channel->channel); + struct efx_nic *efx = channel->efx; + int rc; + + EFX_WARN_ON_PARANOID(channel->eventq_init); - channel->eventq_read_ptr = 0; + netif_dbg(efx, drv, efx->net_dev, + "chan %d init event queue\n", channel->channel); - efx_nic_init_eventq(channel); - channel->eventq_init = true; + rc = efx_nic_init_eventq(channel); + if (rc == 0) { + efx->type->push_irq_moderation(channel); + channel->eventq_read_ptr = 0; + channel->eventq_init = true; + } + return rc; } /* Enable event queue processing and NAPI */ @@ -579,7 +587,7 @@ static void efx_start_datapath(struct efx_nic *efx) rx_buf_len = (sizeof(struct efx_rx_page_state) + NET_IP_ALIGN + efx->rx_dma_len); if (rx_buf_len <= PAGE_SIZE) { - efx->rx_scatter = false; + efx->rx_scatter = efx->type->always_rx_scatter; efx->rx_buffer_order = 0; } else if (efx->type->can_rx_scatter) { BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); @@ -607,7 +615,7 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_dma_len, efx->rx_page_buf_step, efx->rx_bufs_per_page, efx->rx_pages_per_batch); - /* RX filters also have scatter-enabled flags */ + /* RX filters may also have scatter-enabled flags */ if (efx->rx_scatter != old_rx_scatter) efx->type->filter_update_rx_scatter(efx); @@ -623,11 +631,14 @@ static void efx_start_datapath(struct efx_nic *efx) /* Initialise the channels */ efx_for_each_channel(channel, efx) { - efx_for_each_channel_tx_queue(tx_queue, channel) + efx_for_each_channel_tx_queue(tx_queue, channel) { efx_init_tx_queue(tx_queue); + atomic_inc(&efx->active_queues); + } efx_for_each_channel_rx_queue(rx_queue, channel) { efx_init_rx_queue(rx_queue); + atomic_inc(&efx->active_queues); efx_nic_generate_fill_event(rx_queue); } @@ -722,7 +733,7 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; u32 old_rxq_entries, old_txq_entries; unsigned i, next_buffer_table = 0; - int rc; + int rc, rc2; rc = efx_check_disabled(efx); if (rc) @@ -802,9 +813,16 @@ out: } } - efx_soft_enable_interrupts(efx); - efx_start_all(efx); - netif_device_attach(efx->net_dev); + rc2 = efx_soft_enable_interrupts(efx); + if (rc2) { + rc = rc ? rc : rc2; + netif_err(efx, drv, efx->net_dev, + "unable to restart interrupts on channel reallocation\n"); + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + } else { + efx_start_all(efx); + netif_device_attach(efx->net_dev); + } return rc; rollback: @@ -1327,9 +1345,10 @@ static int efx_probe_interrupts(struct efx_nic *efx) return 0; } -static void efx_soft_enable_interrupts(struct efx_nic *efx) +static int efx_soft_enable_interrupts(struct efx_nic *efx) { - struct efx_channel *channel; + struct efx_channel *channel, *end_channel; + int rc; BUG_ON(efx->state == STATE_DISABLED); @@ -1337,12 +1356,28 @@ static void efx_soft_enable_interrupts(struct efx_nic *efx) smp_wmb(); efx_for_each_channel(channel, efx) { - if (!channel->type->keep_eventq) - efx_init_eventq(channel); + if (!channel->type->keep_eventq) { + rc = efx_init_eventq(channel); + if (rc) + goto fail; + } efx_start_eventq(channel); } efx_mcdi_mode_event(efx); + + return 0; +fail: + end_channel = channel; + efx_for_each_channel(channel, efx) { + if (channel == end_channel) + break; + efx_stop_eventq(channel); + if (!channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + return rc; } static void efx_soft_disable_interrupts(struct efx_nic *efx) @@ -1368,11 +1403,15 @@ static void efx_soft_disable_interrupts(struct efx_nic *efx) if (!channel->type->keep_eventq) efx_fini_eventq(channel); } + + /* Flush the asynchronous MCDI request queue */ + efx_mcdi_flush_async(efx); } -static void efx_enable_interrupts(struct efx_nic *efx) +static int efx_enable_interrupts(struct efx_nic *efx) { - struct efx_channel *channel; + struct efx_channel *channel, *end_channel; + int rc; BUG_ON(efx->state == STATE_DISABLED); @@ -1384,11 +1423,31 @@ static void efx_enable_interrupts(struct efx_nic *efx) efx->type->irq_enable_master(efx); efx_for_each_channel(channel, efx) { + if (channel->type->keep_eventq) { + rc = efx_init_eventq(channel); + if (rc) + goto fail; + } + } + + rc = efx_soft_enable_interrupts(efx); + if (rc) + goto fail; + + return 0; + +fail: + end_channel = channel; + efx_for_each_channel(channel, efx) { + if (channel == end_channel) + break; if (channel->type->keep_eventq) - efx_init_eventq(channel); + efx_fini_eventq(channel); } - efx_soft_enable_interrupts(efx); + efx->type->irq_disable_non_ev(efx); + + return rc; } static void efx_disable_interrupts(struct efx_nic *efx) @@ -1459,9 +1518,11 @@ static int efx_probe_nic(struct efx_nic *efx) * in MSI-X interrupts. */ rc = efx_probe_interrupts(efx); if (rc) - goto fail; + goto fail1; - efx->type->dimension_resources(efx); + rc = efx->type->dimension_resources(efx); + if (rc) + goto fail2; if (efx->n_channels > 1) get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); @@ -1479,7 +1540,9 @@ static int efx_probe_nic(struct efx_nic *efx) return 0; -fail: +fail2: + efx_remove_interrupts(efx); +fail1: efx->type->remove(efx); return rc; } @@ -2012,7 +2075,7 @@ static int efx_set_features(struct net_device *net_dev, netdev_features_t data) return 0; } -static const struct net_device_ops efx_netdev_ops = { +static const struct net_device_ops efx_farch_netdev_ops = { .ndo_open = efx_net_open, .ndo_stop = efx_net_stop, .ndo_get_stats64 = efx_net_stats, @@ -2039,6 +2102,26 @@ static const struct net_device_ops efx_netdev_ops = { #endif }; +static const struct net_device_ops efx_ef10_netdev_ops = { + .ndo_open = efx_net_open, + .ndo_stop = efx_net_stop, + .ndo_get_stats64 = efx_net_stats, + .ndo_tx_timeout = efx_watchdog, + .ndo_start_xmit = efx_hard_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_do_ioctl = efx_ioctl, + .ndo_change_mtu = efx_change_mtu, + .ndo_set_mac_address = efx_set_mac_address, + .ndo_set_rx_mode = efx_set_rx_mode, + .ndo_set_features = efx_set_features, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = efx_netpoll, +#endif +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = efx_filter_rfs, +#endif +}; + static void efx_update_name(struct efx_nic *efx) { strcpy(efx->name, efx->net_dev->name); @@ -2051,7 +2134,8 @@ static int efx_netdev_event(struct notifier_block *this, { struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); - if (net_dev->netdev_ops == &efx_netdev_ops && + if ((net_dev->netdev_ops == &efx_farch_netdev_ops || + net_dev->netdev_ops == &efx_ef10_netdev_ops) && event == NETDEV_CHANGENAME) efx_update_name(netdev_priv(net_dev)); @@ -2078,7 +2162,12 @@ static int efx_register_netdev(struct efx_nic *efx) net_dev->watchdog_timeo = 5 * HZ; net_dev->irq = efx->pci_dev->irq; - net_dev->netdev_ops = &efx_netdev_ops; + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { + net_dev->netdev_ops = &efx_ef10_netdev_ops; + net_dev->priv_flags |= IFF_UNICAST_FLT; + } else { + net_dev->netdev_ops = &efx_farch_netdev_ops; + } SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; @@ -2202,7 +2291,9 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) "could not restore PHY settings\n"); } - efx_enable_interrupts(efx); + rc = efx_enable_interrupts(efx); + if (rc) + goto fail; efx_restore_filters(efx); efx_sriov_reset(efx); @@ -2398,6 +2489,8 @@ static DEFINE_PCI_DEVICE_TABLE(efx_pci_table) = { .driver_data = (unsigned long) &siena_a0_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813), /* SFL9021 */ .driver_data = (unsigned long) &siena_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */ + .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, {0} /* end of list */ }; @@ -2646,10 +2739,14 @@ static int efx_pci_probe_main(struct efx_nic *efx) rc = efx_nic_init_interrupt(efx); if (rc) goto fail5; - efx_enable_interrupts(efx); + rc = efx_enable_interrupts(efx); + if (rc) + goto fail6; return 0; + fail6: + efx_nic_fini_interrupt(efx); fail5: efx_fini_port(efx); fail4: @@ -2777,12 +2874,15 @@ static int efx_pm_freeze(struct device *dev) static int efx_pm_thaw(struct device *dev) { + int rc; struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); rtnl_lock(); if (efx->state != STATE_DISABLED) { - efx_enable_interrupts(efx); + rc = efx_enable_interrupts(efx); + if (rc) + goto fail; mutex_lock(&efx->mac_lock); efx->phy_op->reconfigure(efx); @@ -2803,6 +2903,11 @@ static int efx_pm_thaw(struct device *dev) queue_work(reset_workqueue, &efx->reset_work); return 0; + +fail: + rtnl_unlock(); + + return rc; } static int efx_pm_poweroff(struct device *dev) @@ -2839,8 +2944,8 @@ static int efx_pm_resume(struct device *dev) rc = efx->type->init(efx); if (rc) return rc; - efx_pm_thaw(dev); - return 0; + rc = efx_pm_thaw(dev); + return rc; } static int efx_pm_suspend(struct device *dev) diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 3bbc047baea2..34d00f5771fe 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -79,13 +79,20 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); * On success, return the filter ID. * On failure, return a negative error code. * - * If an existing filter has equal match values to the new filter - * spec, then the new filter might replace it, depending on the - * relative priorities. If the existing filter has lower priority, or - * if @replace_equal is set and it has equal priority, then it is - * replaced. Otherwise the function fails, returning -%EPERM if - * the existing filter has higher priority or -%EEXIST if it has - * equal priority. + * If existing filters have equal match values to the new filter spec, + * then the new filter might replace them or the function might fail, + * as follows. + * + * 1. If the existing filters have lower priority, or @replace_equal + * is set and they have equal priority, replace them. + * + * 2. If the existing filters have higher priority, return -%EPERM. + * + * 3. If !efx_filter_is_mc_recipient(@spec), or the NIC does not + * support delivery to multiple recipients, return -%EEXIST. + * + * This implies that filters for multiple multicast recipients must + * all be inserted with the same priority and @replace_equal = %false. */ static inline s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, @@ -169,6 +176,7 @@ static inline void efx_filter_rfs_expire(struct efx_channel *channel) static inline void efx_filter_rfs_expire(struct efx_channel *channel) {} #define efx_filter_rfs_enabled() 0 #endif +extern bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); /* Channels */ extern int efx_channel_dummy_op_int(struct efx_channel *channel); diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h index 8665921d7170..7fdfee019092 100644 --- a/drivers/net/ethernet/sfc/enum.h +++ b/drivers/net/ethernet/sfc/enum.h @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2007-2009 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2007-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 63546930f954..5b471cf5c323 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -77,6 +77,8 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_nodesc_trunc), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), }; #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index ec77611a52e4..8685f99d872a 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -2174,10 +2174,11 @@ out: return rc; } -static void falcon_dimension_resources(struct efx_nic *efx) +static int falcon_dimension_resources(struct efx_nic *efx) { efx->rx_dc_base = 0x20000; efx->tx_dc_base = 0x26000; + return 0; } /* Probe all SPI devices on the NIC */ diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index ec1e99d0dcad..1736f4b806af 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2007-2010 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2007-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index d21483dfea40..c0907d884d75 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2011 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -325,6 +325,8 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue) txd = efx_tx_desc(tx_queue, write_ptr); ++tx_queue->write_count; + EFX_BUG_ON_PARANOID(buffer->flags & EFX_TX_BUF_OPTION); + /* Create TX descriptor ring entry */ BUILD_BUG_ON(EFX_TX_BUF_CONT != 1); EFX_POPULATE_QWORD_4(*txd, @@ -594,7 +596,7 @@ static bool efx_farch_flush_wake(struct efx_nic *efx) /* Ensure that all updates are visible to efx_farch_flush_queues() */ smp_mb(); - return (atomic_read(&efx->drain_pending) == 0 || + return (atomic_read(&efx->active_queues) == 0 || (atomic_read(&efx->rxq_flush_outstanding) < EFX_RX_FLUSH_COUNT && atomic_read(&efx->rxq_flush_pending) > 0)); } @@ -626,7 +628,7 @@ static bool efx_check_tx_flush_complete(struct efx_nic *efx) netif_dbg(efx, hw, efx->net_dev, "flush complete on TXQ %d, so drain " "the queue\n", tx_queue->queue); - /* Don't need to increment drain_pending as it + /* Don't need to increment active_queues as it * has already been incremented for the queues * which did not drain */ @@ -653,17 +655,15 @@ static int efx_farch_do_flush(struct efx_nic *efx) efx_for_each_channel(channel, efx) { efx_for_each_channel_tx_queue(tx_queue, channel) { - atomic_inc(&efx->drain_pending); efx_farch_flush_tx_queue(tx_queue); } efx_for_each_channel_rx_queue(rx_queue, channel) { - atomic_inc(&efx->drain_pending); rx_queue->flush_pending = true; atomic_inc(&efx->rxq_flush_pending); } } - while (timeout && atomic_read(&efx->drain_pending) > 0) { + while (timeout && atomic_read(&efx->active_queues) > 0) { /* If SRIOV is enabled, then offload receive queue flushing to * the firmware (though we will still have to poll for * completion). If that fails, fall back to the old scheme. @@ -699,15 +699,15 @@ static int efx_farch_do_flush(struct efx_nic *efx) timeout); } - if (atomic_read(&efx->drain_pending) && + if (atomic_read(&efx->active_queues) && !efx_check_tx_flush_complete(efx)) { netif_err(efx, hw, efx->net_dev, "failed to flush %d queues " - "(rx %d+%d)\n", atomic_read(&efx->drain_pending), + "(rx %d+%d)\n", atomic_read(&efx->active_queues), atomic_read(&efx->rxq_flush_outstanding), atomic_read(&efx->rxq_flush_pending)); rc = -ETIMEDOUT; - atomic_set(&efx->drain_pending, 0); + atomic_set(&efx->active_queues, 0); atomic_set(&efx->rxq_flush_pending, 0); atomic_set(&efx->rxq_flush_outstanding, 0); } @@ -1123,8 +1123,8 @@ efx_farch_handle_drain_event(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; - WARN_ON(atomic_read(&efx->drain_pending) == 0); - atomic_dec(&efx->drain_pending); + WARN_ON(atomic_read(&efx->active_queues) == 0); + atomic_dec(&efx->active_queues); if (efx_farch_flush_wake(efx)) wake_up(&efx->flush_wq); } @@ -1325,7 +1325,7 @@ int efx_farch_ev_probe(struct efx_channel *channel) entries * sizeof(efx_qword_t)); } -void efx_farch_ev_init(struct efx_channel *channel) +int efx_farch_ev_init(struct efx_channel *channel) { efx_oword_t reg; struct efx_nic *efx = channel->efx; @@ -1357,7 +1357,7 @@ void efx_farch_ev_init(struct efx_channel *channel) efx_writeo_table(efx, ®, efx->type->evq_ptr_tbl_base, channel->channel); - efx->type->push_irq_moderation(channel); + return 0; } void efx_farch_ev_fini(struct efx_channel *channel) diff --git a/drivers/net/ethernet/sfc/farch_regs.h b/drivers/net/ethernet/sfc/farch_regs.h index 491b1039006b..7019a712e799 100644 --- a/drivers/net/ethernet/sfc/farch_regs.h +++ b/drivers/net/ethernet/sfc/farch_regs.h @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h index e459e43a2798..63c77a557178 100644 --- a/drivers/net/ethernet/sfc/filter.h +++ b/drivers/net/ethernet/sfc/filter.h @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2005-2010 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2005-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/io.h b/drivers/net/ethernet/sfc/io.h index 19e8b95b7af6..96ce507d8602 100644 --- a/drivers/net/ethernet/sfc/io.h +++ b/drivers/net/ethernet/sfc/io.h @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -20,7 +20,7 @@ * ************************************************************************** * - * Notes on locking strategy: + * Notes on locking strategy for the Falcon architecture: * * Many CSRs are very wide and cannot be read or written atomically. * Writes from the host are buffered by the Bus Interface Unit (BIU) @@ -54,6 +54,12 @@ * register while the collector already holds values for some other * register, the write is discarded and the collector maintains its * current state. + * + * The EF10 architecture exposes very few registers to the host and + * most of them are only 32 bits wide. The only exceptions are the MC + * doorbell register pair, which has its own latching, and + * TX_DESC_UPD, which works in a similar way to the Falcon + * architecture. */ #if BITS_PER_LONG == 64 @@ -237,8 +243,8 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, BUILD_BUG_ON_ZERO((reg) != 0x830 && (reg) != 0xa10), \ page) -/* Write a page-mapped 32-bit CSR (EVQ_RPTR or the high bits of - * RX_DESC_UPD or TX_DESC_UPD) +/* Write a page-mapped 32-bit CSR (EVQ_RPTR, EVQ_TMR (EF10), or the + * high bits of RX_DESC_UPD or TX_DESC_UPD) */ static inline void _efx_writed_page(struct efx_nic *efx, const efx_dword_t *value, @@ -249,8 +255,12 @@ _efx_writed_page(struct efx_nic *efx, const efx_dword_t *value, #define efx_writed_page(efx, value, reg, page) \ _efx_writed_page(efx, value, \ reg + \ - BUILD_BUG_ON_ZERO((reg) != 0x400 && (reg) != 0x83c \ - && (reg) != 0xa1c), \ + BUILD_BUG_ON_ZERO((reg) != 0x400 && \ + (reg) != 0x420 && \ + (reg) != 0x830 && \ + (reg) != 0x83c && \ + (reg) != 0xa18 && \ + (reg) != 0xa1c), \ page) /* Write TIMER_COMMAND. This is a page-mapped 32-bit CSR, but a bug diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 63121adbc3bb..128d7cdf9eb2 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2008-2011 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2008-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -8,6 +8,7 @@ */ #include <linux/delay.h> +#include <asm/cmpxchg.h> #include "net_driver.h" #include "nic.h" #include "io.h" @@ -36,6 +37,20 @@ #define SEQ_MASK \ EFX_MASK32(EFX_WIDTH(MCDI_HEADER_SEQ)) +struct efx_mcdi_async_param { + struct list_head list; + unsigned int cmd; + size_t inlen; + size_t outlen; + efx_mcdi_async_completer *complete; + unsigned long cookie; + /* followed by request/response buffer */ +}; + +static void efx_mcdi_timeout_async(unsigned long context); +static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, + bool *was_attached_out); + static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) { EFX_BUG_ON_PARANOID(!efx->mcdi); @@ -45,40 +60,76 @@ static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) int efx_mcdi_init(struct efx_nic *efx) { struct efx_mcdi_iface *mcdi; + bool already_attached; + int rc; efx->mcdi = kzalloc(sizeof(*efx->mcdi), GFP_KERNEL); if (!efx->mcdi) return -ENOMEM; mcdi = efx_mcdi(efx); + mcdi->efx = efx; init_waitqueue_head(&mcdi->wq); spin_lock_init(&mcdi->iface_lock); - atomic_set(&mcdi->state, MCDI_STATE_QUIESCENT); + mcdi->state = MCDI_STATE_QUIESCENT; mcdi->mode = MCDI_MODE_POLL; + spin_lock_init(&mcdi->async_lock); + INIT_LIST_HEAD(&mcdi->async_list); + setup_timer(&mcdi->async_timer, efx_mcdi_timeout_async, + (unsigned long)mcdi); (void) efx_mcdi_poll_reboot(efx); mcdi->new_epoch = true; /* Recover from a failed assertion before probing */ - return efx_mcdi_handle_assertion(efx); + rc = efx_mcdi_handle_assertion(efx); + if (rc) + return rc; + + /* Let the MC (and BMC, if this is a LOM) know that the driver + * is loaded. We should do this before we reset the NIC. + */ + rc = efx_mcdi_drv_attach(efx, true, &already_attached); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "Unable to register driver with MCPU\n"); + return rc; + } + if (already_attached) + /* Not a fatal error */ + netif_err(efx, probe, efx->net_dev, + "Host already registered with MCPU\n"); + + return 0; } void efx_mcdi_fini(struct efx_nic *efx) { - BUG_ON(efx->mcdi && - atomic_read(&efx->mcdi->iface.state) != MCDI_STATE_QUIESCENT); + if (!efx->mcdi) + return; + + BUG_ON(efx->mcdi->iface.state != MCDI_STATE_QUIESCENT); + + /* Relinquish the device (back to the BMC, if this is a LOM) */ + efx_mcdi_drv_attach(efx, false, NULL); + kfree(efx->mcdi); } -static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, - const efx_dword_t *inbuf, size_t inlen) +static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); efx_dword_t hdr[2]; size_t hdr_len; u32 xflags, seqno; - BUG_ON(atomic_read(&mcdi->state) == MCDI_STATE_QUIESCENT); + BUG_ON(mcdi->state == MCDI_STATE_QUIESCENT); + + /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */ + spin_lock_bh(&mcdi->iface_lock); + ++mcdi->seqno; + spin_unlock_bh(&mcdi->iface_lock); seqno = mcdi->seqno & SEQ_MASK; xflags = 0; @@ -114,6 +165,8 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, } efx->type->mcdi_request(efx, hdr, hdr_len, inbuf, inlen); + + mcdi->new_epoch = false; } static int efx_mcdi_errno(unsigned int mcdi_err) @@ -246,25 +299,30 @@ int efx_mcdi_poll_reboot(struct efx_nic *efx) return efx->type->mcdi_poll_reboot(efx); } -static void efx_mcdi_acquire(struct efx_mcdi_iface *mcdi) +static bool efx_mcdi_acquire_async(struct efx_mcdi_iface *mcdi) +{ + return cmpxchg(&mcdi->state, + MCDI_STATE_QUIESCENT, MCDI_STATE_RUNNING_ASYNC) == + MCDI_STATE_QUIESCENT; +} + +static void efx_mcdi_acquire_sync(struct efx_mcdi_iface *mcdi) { /* Wait until the interface becomes QUIESCENT and we win the race - * to mark it RUNNING. */ + * to mark it RUNNING_SYNC. + */ wait_event(mcdi->wq, - atomic_cmpxchg(&mcdi->state, - MCDI_STATE_QUIESCENT, - MCDI_STATE_RUNNING) - == MCDI_STATE_QUIESCENT); + cmpxchg(&mcdi->state, + MCDI_STATE_QUIESCENT, MCDI_STATE_RUNNING_SYNC) == + MCDI_STATE_QUIESCENT); } static int efx_mcdi_await_completion(struct efx_nic *efx) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - if (wait_event_timeout( - mcdi->wq, - atomic_read(&mcdi->state) == MCDI_STATE_COMPLETED, - MCDI_RPC_TIMEOUT) == 0) + if (wait_event_timeout(mcdi->wq, mcdi->state == MCDI_STATE_COMPLETED, + MCDI_RPC_TIMEOUT) == 0) return -ETIMEDOUT; /* Check if efx_mcdi_set_mode() switched us back to polled completions. @@ -281,17 +339,14 @@ static int efx_mcdi_await_completion(struct efx_nic *efx) return 0; } -static bool efx_mcdi_complete(struct efx_mcdi_iface *mcdi) +/* If the interface is RUNNING_SYNC, switch to COMPLETED and wake the + * requester. Return whether this was done. Does not take any locks. + */ +static bool efx_mcdi_complete_sync(struct efx_mcdi_iface *mcdi) { - /* If the interface is RUNNING, then move to COMPLETED and wake any - * waiters. If the interface isn't in RUNNING then we've received a - * duplicate completion after we've already transitioned back to - * QUIESCENT. [A subsequent invocation would increment seqno, so would - * have failed the seqno check]. - */ - if (atomic_cmpxchg(&mcdi->state, - MCDI_STATE_RUNNING, - MCDI_STATE_COMPLETED) == MCDI_STATE_RUNNING) { + if (cmpxchg(&mcdi->state, + MCDI_STATE_RUNNING_SYNC, MCDI_STATE_COMPLETED) == + MCDI_STATE_RUNNING_SYNC) { wake_up(&mcdi->wq); return true; } @@ -301,10 +356,91 @@ static bool efx_mcdi_complete(struct efx_mcdi_iface *mcdi) static void efx_mcdi_release(struct efx_mcdi_iface *mcdi) { - atomic_set(&mcdi->state, MCDI_STATE_QUIESCENT); + if (mcdi->mode == MCDI_MODE_EVENTS) { + struct efx_mcdi_async_param *async; + struct efx_nic *efx = mcdi->efx; + + /* Process the asynchronous request queue */ + spin_lock_bh(&mcdi->async_lock); + async = list_first_entry_or_null( + &mcdi->async_list, struct efx_mcdi_async_param, list); + if (async) { + mcdi->state = MCDI_STATE_RUNNING_ASYNC; + efx_mcdi_send_request(efx, async->cmd, + (const efx_dword_t *)(async + 1), + async->inlen); + mod_timer(&mcdi->async_timer, + jiffies + MCDI_RPC_TIMEOUT); + } + spin_unlock_bh(&mcdi->async_lock); + + if (async) + return; + } + + mcdi->state = MCDI_STATE_QUIESCENT; wake_up(&mcdi->wq); } +/* If the interface is RUNNING_ASYNC, switch to COMPLETED, call the + * asynchronous completion function, and release the interface. + * Return whether this was done. Must be called in bh-disabled + * context. Will take iface_lock and async_lock. + */ +static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout) +{ + struct efx_nic *efx = mcdi->efx; + struct efx_mcdi_async_param *async; + size_t hdr_len, data_len; + efx_dword_t *outbuf; + int rc; + + if (cmpxchg(&mcdi->state, + MCDI_STATE_RUNNING_ASYNC, MCDI_STATE_COMPLETED) != + MCDI_STATE_RUNNING_ASYNC) + return false; + + spin_lock(&mcdi->iface_lock); + if (timeout) { + /* Ensure that if the completion event arrives later, + * the seqno check in efx_mcdi_ev_cpl() will fail + */ + ++mcdi->seqno; + ++mcdi->credits; + rc = -ETIMEDOUT; + hdr_len = 0; + data_len = 0; + } else { + rc = mcdi->resprc; + hdr_len = mcdi->resp_hdr_len; + data_len = mcdi->resp_data_len; + } + spin_unlock(&mcdi->iface_lock); + + /* Stop the timer. In case the timer function is running, we + * must wait for it to return so that there is no possibility + * of it aborting the next request. + */ + if (!timeout) + del_timer_sync(&mcdi->async_timer); + + spin_lock(&mcdi->async_lock); + async = list_first_entry(&mcdi->async_list, + struct efx_mcdi_async_param, list); + list_del(&async->list); + spin_unlock(&mcdi->async_lock); + + outbuf = (efx_dword_t *)(async + 1); + efx->type->mcdi_read_response(efx, outbuf, hdr_len, + min(async->outlen, data_len)); + async->complete(efx, async->cookie, rc, outbuf, data_len); + kfree(async); + + efx_mcdi_release(mcdi); + + return true; +} + static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno, unsigned int datalen, unsigned int mcdi_err) { @@ -336,8 +472,40 @@ static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno, spin_unlock(&mcdi->iface_lock); - if (wake) - efx_mcdi_complete(mcdi); + if (wake) { + if (!efx_mcdi_complete_async(mcdi, false)) + (void) efx_mcdi_complete_sync(mcdi); + + /* If the interface isn't RUNNING_ASYNC or + * RUNNING_SYNC then we've received a duplicate + * completion after we've already transitioned back to + * QUIESCENT. [A subsequent invocation would increment + * seqno, so would have failed the seqno check]. + */ + } +} + +static void efx_mcdi_timeout_async(unsigned long context) +{ + struct efx_mcdi_iface *mcdi = (struct efx_mcdi_iface *)context; + + efx_mcdi_complete_async(mcdi, true); +} + +static int +efx_mcdi_check_supported(struct efx_nic *efx, unsigned int cmd, size_t inlen) +{ + if (efx->type->mcdi_max_ver < 0 || + (efx->type->mcdi_max_ver < 2 && + cmd > MC_CMD_CMD_SPACE_ESCAPE_7)) + return -EINVAL; + + if (inlen > MCDI_CTL_SDU_LEN_MAX_V2 || + (efx->type->mcdi_max_ver < 2 && + inlen > MCDI_CTL_SDU_LEN_MAX_V1)) + return -EMSGSIZE; + + return 0; } int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, @@ -358,27 +526,84 @@ int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, const efx_dword_t *inbuf, size_t inlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + int rc; - if (efx->type->mcdi_max_ver < 0 || - (efx->type->mcdi_max_ver < 2 && - cmd > MC_CMD_CMD_SPACE_ESCAPE_7)) - return -EINVAL; + rc = efx_mcdi_check_supported(efx, cmd, inlen); + if (rc) + return rc; - if (inlen > MCDI_CTL_SDU_LEN_MAX_V2 || - (efx->type->mcdi_max_ver < 2 && - inlen > MCDI_CTL_SDU_LEN_MAX_V1)) - return -EMSGSIZE; + efx_mcdi_acquire_sync(mcdi); + efx_mcdi_send_request(efx, cmd, inbuf, inlen); + return 0; +} + +/** + * efx_mcdi_rpc_async - Schedule an MCDI command to run asynchronously + * @efx: NIC through which to issue the command + * @cmd: Command type number + * @inbuf: Command parameters + * @inlen: Length of command parameters, in bytes + * @outlen: Length to allocate for response buffer, in bytes + * @complete: Function to be called on completion or cancellation. + * @cookie: Arbitrary value to be passed to @complete. + * + * This function does not sleep and therefore may be called in atomic + * context. It will fail if event queues are disabled or if MCDI + * event completions have been disabled due to an error. + * + * If it succeeds, the @complete function will be called exactly once + * in atomic context, when one of the following occurs: + * (a) the completion event is received (in NAPI context) + * (b) event queues are disabled (in the process that disables them) + * (c) the request times-out (in timer context) + */ +int +efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, size_t outlen, + efx_mcdi_async_completer *complete, unsigned long cookie) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + struct efx_mcdi_async_param *async; + int rc; - efx_mcdi_acquire(mcdi); + rc = efx_mcdi_check_supported(efx, cmd, inlen); + if (rc) + return rc; - /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */ - spin_lock_bh(&mcdi->iface_lock); - ++mcdi->seqno; - spin_unlock_bh(&mcdi->iface_lock); + async = kmalloc(sizeof(*async) + ALIGN(max(inlen, outlen), 4), + GFP_ATOMIC); + if (!async) + return -ENOMEM; - efx_mcdi_copyin(efx, cmd, inbuf, inlen); - mcdi->new_epoch = false; - return 0; + async->cmd = cmd; + async->inlen = inlen; + async->outlen = outlen; + async->complete = complete; + async->cookie = cookie; + memcpy(async + 1, inbuf, inlen); + + spin_lock_bh(&mcdi->async_lock); + + if (mcdi->mode == MCDI_MODE_EVENTS) { + list_add_tail(&async->list, &mcdi->async_list); + + /* If this is at the front of the queue, try to start it + * immediately + */ + if (mcdi->async_list.next == &async->list && + efx_mcdi_acquire_async(mcdi)) { + efx_mcdi_send_request(efx, cmd, inbuf, inlen); + mod_timer(&mcdi->async_timer, + jiffies + MCDI_RPC_TIMEOUT); + } + } else { + kfree(async); + rc = -ENETDOWN; + } + + spin_unlock_bh(&mcdi->async_lock); + + return rc; } int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, @@ -448,6 +673,10 @@ int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, return rc; } +/* Switch to polled MCDI completions. This can be called in various + * error conditions with various locks held, so it must be lockless. + * Caller is responsible for flushing asynchronous requests later. + */ void efx_mcdi_mode_poll(struct efx_nic *efx) { struct efx_mcdi_iface *mcdi; @@ -465,11 +694,50 @@ void efx_mcdi_mode_poll(struct efx_nic *efx) * efx_mcdi_await_completion() will then call efx_mcdi_poll(). * * We need an smp_wmb() to synchronise with efx_mcdi_await_completion(), - * which efx_mcdi_complete() provides for us. + * which efx_mcdi_complete_sync() provides for us. */ mcdi->mode = MCDI_MODE_POLL; - efx_mcdi_complete(mcdi); + efx_mcdi_complete_sync(mcdi); +} + +/* Flush any running or queued asynchronous requests, after event processing + * is stopped + */ +void efx_mcdi_flush_async(struct efx_nic *efx) +{ + struct efx_mcdi_async_param *async, *next; + struct efx_mcdi_iface *mcdi; + + if (!efx->mcdi) + return; + + mcdi = efx_mcdi(efx); + + /* We must be in polling mode so no more requests can be queued */ + BUG_ON(mcdi->mode != MCDI_MODE_POLL); + + del_timer_sync(&mcdi->async_timer); + + /* If a request is still running, make sure we give the MC + * time to complete it so that the response won't overwrite our + * next request. + */ + if (mcdi->state == MCDI_STATE_RUNNING_ASYNC) { + efx_mcdi_poll(efx); + mcdi->state = MCDI_STATE_QUIESCENT; + } + + /* Nothing else will access the async list now, so it is safe + * to walk it without holding async_lock. If we hold it while + * calling a completer then lockdep may warn that we have + * acquired locks in the wrong order. + */ + list_for_each_entry_safe(async, next, &mcdi->async_list, list) { + async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); + list_del(&async->list); + kfree(async); + } } void efx_mcdi_mode_event(struct efx_nic *efx) @@ -491,7 +759,7 @@ void efx_mcdi_mode_event(struct efx_nic *efx) * write memory barrier ensure that efx_mcdi_rpc() sees it, which * efx_mcdi_acquire() provides. */ - efx_mcdi_acquire(mcdi); + efx_mcdi_acquire_sync(mcdi); mcdi->mode = MCDI_MODE_EVENTS; efx_mcdi_release(mcdi); } @@ -508,16 +776,21 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc) * are sent to the same queue, we can't be racing with * efx_mcdi_ev_cpl()] * - * There's a race here with efx_mcdi_rpc(), because we might receive - * a REBOOT event *before* the request has been copied out. In polled - * mode (during startup) this is irrelevant, because efx_mcdi_complete() - * is ignored. In event mode, this condition is just an edge-case of - * receiving a REBOOT event after posting the MCDI request. Did the mc - * reboot before or after the copyout? The best we can do always is - * just return failure. + * If there is an outstanding asynchronous request, we can't + * complete it now (efx_mcdi_complete() would deadlock). The + * reset process will take care of this. + * + * There's a race here with efx_mcdi_send_request(), because + * we might receive a REBOOT event *before* the request has + * been copied out. In polled mode (during startup) this is + * irrelevant, because efx_mcdi_complete_sync() is ignored. In + * event mode, this condition is just an edge-case of + * receiving a REBOOT event after posting the MCDI + * request. Did the mc reboot before or after the copyout? The + * best we can do always is just return failure. */ spin_lock(&mcdi->iface_lock); - if (efx_mcdi_complete(mcdi)) { + if (efx_mcdi_complete_sync(mcdi)) { if (mcdi->mode == MCDI_MODE_EVENTS) { mcdi->resprc = rc; mcdi->resp_hdr_len = 0; @@ -579,6 +852,7 @@ void efx_mcdi_process_event(struct efx_channel *channel, "MC Scheduler error address=0x%x\n", data); break; case MCDI_EVENT_CODE_REBOOT: + case MCDI_EVENT_CODE_MC_REBOOT: netif_info(efx, hw, efx->net_dev, "MC Reboot\n"); efx_mcdi_ev_death(efx, -EIO); break; @@ -593,7 +867,19 @@ void efx_mcdi_process_event(struct efx_channel *channel, case MCDI_EVENT_CODE_PTP_PPS: efx_ptp_event(efx, event); break; - + case MCDI_EVENT_CODE_TX_FLUSH: + case MCDI_EVENT_CODE_RX_FLUSH: + /* Two flush events will be sent: one to the same event + * queue as completions, and one to event queue 0. + * In the latter case the {RX,TX}_FLUSH_TO_DRIVER + * flag will be set, and we should ignore the event + * because we want to wait for all completions. + */ + BUILD_BUG_ON(MCDI_EVENT_TX_FLUSH_TO_DRIVER_LBN != + MCDI_EVENT_RX_FLUSH_TO_DRIVER_LBN); + if (!MCDI_EVENT_FIELD(*event, TX_FLUSH_TO_DRIVER)) + efx_ef10_handle_drain_event(efx); + break; case MCDI_EVENT_CODE_TX_ERR: case MCDI_EVENT_CODE_RX_ERR: netif_err(efx, hw, efx->net_dev, @@ -617,27 +903,55 @@ void efx_mcdi_process_event(struct efx_channel *channel, void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) { - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_VERSION_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, + max(MC_CMD_GET_VERSION_OUT_LEN, + MC_CMD_GET_CAPABILITIES_OUT_LEN)); size_t outlength; const __le16 *ver_words; + size_t offset; int rc; BUILD_BUG_ON(MC_CMD_GET_VERSION_IN_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_VERSION, NULL, 0, outbuf, sizeof(outbuf), &outlength); if (rc) goto fail; - if (outlength < MC_CMD_GET_VERSION_OUT_LEN) { rc = -EIO; goto fail; } ver_words = (__le16 *)MCDI_PTR(outbuf, GET_VERSION_OUT_VERSION); - snprintf(buf, len, "%u.%u.%u.%u", - le16_to_cpu(ver_words[0]), le16_to_cpu(ver_words[1]), - le16_to_cpu(ver_words[2]), le16_to_cpu(ver_words[3])); + offset = snprintf(buf, len, "%u.%u.%u.%u", + le16_to_cpu(ver_words[0]), le16_to_cpu(ver_words[1]), + le16_to_cpu(ver_words[2]), le16_to_cpu(ver_words[3])); + + /* EF10 may have multiple datapath firmware variants within a + * single version. Report which variants are running. + */ + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { + BUILD_BUG_ON(MC_CMD_GET_CAPABILITIES_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_CAPABILITIES, NULL, 0, + outbuf, sizeof(outbuf), &outlength); + if (rc || outlength < MC_CMD_GET_CAPABILITIES_OUT_LEN) + offset += snprintf( + buf + offset, len - offset, " rx? tx?"); + else + offset += snprintf( + buf + offset, len - offset, " rx%x tx%x", + MCDI_WORD(outbuf, + GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID), + MCDI_WORD(outbuf, + GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID)); + + /* It's theoretically possible for the string to exceed 31 + * characters, though in practice the first three version + * components are short enough that this doesn't happen. + */ + if (WARN_ON(offset >= len)) + buf[0] = 0; + } + return; fail: @@ -645,8 +959,8 @@ fail: buf[0] = 0; } -int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, - bool *was_attached) +static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, + bool *was_attached) { MCDI_DECLARE_BUF(inbuf, MC_CMD_DRV_ATTACH_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_DRV_ATTACH_OUT_LEN); @@ -1157,6 +1471,17 @@ fail: return rc; } +int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_WORKAROUND_IN_LEN); + + BUILD_BUG_ON(MC_CMD_WORKAROUND_OUT_LEN != 0); + MCDI_SET_DWORD(inbuf, WORKAROUND_IN_TYPE, type); + MCDI_SET_DWORD(inbuf, WORKAROUND_IN_ENABLED, enabled); + return efx_mcdi_rpc(efx, MC_CMD_WORKAROUND, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + #ifdef CONFIG_SFC_MTD #define EFX_MCDI_NVRAM_LEN_MAX 128 diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 303d9e88a27f..c34d0d4e10ee 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2008-2010 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2008-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -14,15 +14,17 @@ * enum efx_mcdi_state - MCDI request handling state * @MCDI_STATE_QUIESCENT: No pending MCDI requests. If the caller holds the * mcdi @iface_lock then they are able to move to %MCDI_STATE_RUNNING - * @MCDI_STATE_RUNNING: There is an MCDI request pending. Only the thread that - * moved into this state is allowed to move out of it. + * @MCDI_STATE_RUNNING_SYNC: There is a synchronous MCDI request pending. + * Only the thread that moved into this state is allowed to move out of it. + * @MCDI_STATE_RUNNING_ASYNC: There is an asynchronous MCDI request pending. * @MCDI_STATE_COMPLETED: An MCDI request has completed, but the owning thread * has not yet consumed the result. For all other threads, equivalent to * %MCDI_STATE_RUNNING. */ enum efx_mcdi_state { MCDI_STATE_QUIESCENT, - MCDI_STATE_RUNNING, + MCDI_STATE_RUNNING_SYNC, + MCDI_STATE_RUNNING_ASYNC, MCDI_STATE_COMPLETED, }; @@ -33,20 +35,26 @@ enum efx_mcdi_mode { /** * struct efx_mcdi_iface - MCDI protocol context + * @efx: The associated NIC. * @state: Request handling state. Waited for by @wq. * @mode: Poll for mcdi completion, or wait for an mcdi_event. * @wq: Wait queue for threads waiting for @state != %MCDI_STATE_RUNNING * @new_epoch: Indicates start of day or start of MC reboot recovery - * @iface_lock: Serialises access to all the following fields + * @iface_lock: Serialises access to @seqno, @credits and response metadata * @seqno: The next sequence number to use for mcdi requests. * @credits: Number of spurious MCDI completion events allowed before we * trigger a fatal error * @resprc: Response error/success code (Linux numbering) * @resp_hdr_len: Response header length * @resp_data_len: Response data (SDU or error) length + * @async_lock: Serialises access to @async_list while event processing is + * enabled + * @async_list: Queue of asynchronous requests + * @async_timer: Timer for asynchronous request timeout */ struct efx_mcdi_iface { - atomic_t state; + struct efx_nic *efx; + enum efx_mcdi_state state; enum efx_mcdi_mode mode; wait_queue_head_t wq; spinlock_t iface_lock; @@ -56,6 +64,9 @@ struct efx_mcdi_iface { int resprc; size_t resp_hdr_len; size_t resp_data_len; + spinlock_t async_lock; + struct list_head async_list; + struct timer_list async_timer; }; struct efx_mcdi_mon { @@ -70,7 +81,7 @@ struct efx_mcdi_mon { struct efx_mcdi_mtd_partition { struct efx_mtd_partition common; bool updating; - u8 nvram_type; + u16 nvram_type; u16 fw_subtype; }; @@ -111,10 +122,20 @@ extern int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, efx_dword_t *outbuf, size_t outlen, size_t *outlen_actual); +typedef void efx_mcdi_async_completer(struct efx_nic *efx, + unsigned long cookie, int rc, + efx_dword_t *outbuf, + size_t outlen_actual); +extern int efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, + size_t outlen, + efx_mcdi_async_completer *complete, + unsigned long cookie); extern int efx_mcdi_poll_reboot(struct efx_nic *efx); extern void efx_mcdi_mode_poll(struct efx_nic *efx); extern void efx_mcdi_mode_event(struct efx_nic *efx); +extern void efx_mcdi_flush_async(struct efx_nic *efx); extern void efx_mcdi_process_event(struct efx_channel *channel, efx_qword_t *event); @@ -136,6 +157,9 @@ extern void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); #define _MCDI_DWORD(_buf, _field) \ ((_buf) + (_MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, 4) >> 2)) +#define MCDI_WORD(_buf, _field) \ + ((u16)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2) + \ + le16_to_cpu(*(__force const __le16 *)MCDI_PTR(_buf, _field))) #define MCDI_SET_DWORD(_buf, _field, _value) \ EFX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), EFX_DWORD_0, _value) #define MCDI_DWORD(_buf, _field) \ @@ -252,8 +276,6 @@ extern void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); EFX_QWORD_FIELD(_ev, MCDI_EVENT_ ## _field) extern void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len); -extern int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, - bool *was_attached_out); extern int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address, u16 *fw_subtype_list, u32 *capabilities); extern int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, @@ -274,6 +296,8 @@ extern int efx_mcdi_flush_rxqs(struct efx_nic *efx); extern int efx_mcdi_port_probe(struct efx_nic *efx); extern void efx_mcdi_port_remove(struct efx_nic *efx); extern int efx_mcdi_port_reconfigure(struct efx_nic *efx); +extern int efx_mcdi_port_get_number(struct efx_nic *efx); +extern u32 efx_mcdi_phy_get_caps(struct efx_nic *efx); extern void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev); extern int efx_mcdi_set_mac(struct efx_nic *efx); #define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1)) @@ -282,6 +306,7 @@ extern void efx_mcdi_mac_stop_stats(struct efx_nic *efx); extern bool efx_mcdi_mac_check_fault(struct efx_nic *efx); extern enum reset_type efx_mcdi_map_reset_reason(enum reset_type reason); extern int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method); +extern int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled); #ifdef CONFIG_SFC_MCDI_MON extern int efx_mcdi_mon_probe(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/mcdi_mon.c b/drivers/net/ethernet/sfc/mcdi_mon.c index d7d45662d684..4cc5d95b2a5a 100644 --- a/drivers/net/ethernet/sfc/mcdi_mon.c +++ b/drivers/net/ethernet/sfc/mcdi_mon.c @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2011 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2011-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -21,7 +21,9 @@ enum efx_hwmon_type { EFX_HWMON_UNKNOWN, EFX_HWMON_TEMP, /* temperature */ EFX_HWMON_COOL, /* cooling device, probably a heatsink */ - EFX_HWMON_IN /* input voltage */ + EFX_HWMON_IN, /* voltage */ + EFX_HWMON_CURR, /* current */ + EFX_HWMON_POWER, /* power */ }; static const struct { @@ -29,23 +31,52 @@ static const struct { enum efx_hwmon_type hwmon_type; int port; } efx_mcdi_sensor_type[] = { -#define SENSOR(name, label, hwmon_type, port) \ - [MC_CMD_SENSOR_##name] = { label, hwmon_type, port } - SENSOR(CONTROLLER_TEMP, "Controller temp.", EFX_HWMON_TEMP, -1), - SENSOR(PHY_COMMON_TEMP, "PHY temp.", EFX_HWMON_TEMP, -1), - SENSOR(CONTROLLER_COOLING, "Controller cooling", EFX_HWMON_COOL, -1), - SENSOR(PHY0_TEMP, "PHY temp.", EFX_HWMON_TEMP, 0), - SENSOR(PHY0_COOLING, "PHY cooling", EFX_HWMON_COOL, 0), - SENSOR(PHY1_TEMP, "PHY temp.", EFX_HWMON_TEMP, 1), - SENSOR(PHY1_COOLING, "PHY cooling", EFX_HWMON_COOL, 1), - SENSOR(IN_1V0, "1.0V supply", EFX_HWMON_IN, -1), - SENSOR(IN_1V2, "1.2V supply", EFX_HWMON_IN, -1), - SENSOR(IN_1V8, "1.8V supply", EFX_HWMON_IN, -1), - SENSOR(IN_2V5, "2.5V supply", EFX_HWMON_IN, -1), - SENSOR(IN_3V3, "3.3V supply", EFX_HWMON_IN, -1), - SENSOR(IN_12V0, "12.0V supply", EFX_HWMON_IN, -1), - SENSOR(IN_1V2A, "1.2V analogue supply", EFX_HWMON_IN, -1), - SENSOR(IN_VREF, "ref. voltage", EFX_HWMON_IN, -1), +#define SENSOR(name, label, hwmon_type, port) \ + [MC_CMD_SENSOR_##name] = { label, EFX_HWMON_ ## hwmon_type, port } + SENSOR(CONTROLLER_TEMP, "Controller ext. temp.", TEMP, -1), + SENSOR(PHY_COMMON_TEMP, "PHY temp.", TEMP, -1), + SENSOR(CONTROLLER_COOLING, "Controller cooling", COOL, -1), + SENSOR(PHY0_TEMP, "PHY temp.", TEMP, 0), + SENSOR(PHY0_COOLING, "PHY cooling", COOL, 0), + SENSOR(PHY1_TEMP, "PHY temp.", TEMP, 1), + SENSOR(PHY1_COOLING, "PHY cooling", COOL, 1), + SENSOR(IN_1V0, "1.0V supply", IN, -1), + SENSOR(IN_1V2, "1.2V supply", IN, -1), + SENSOR(IN_1V8, "1.8V supply", IN, -1), + SENSOR(IN_2V5, "2.5V supply", IN, -1), + SENSOR(IN_3V3, "3.3V supply", IN, -1), + SENSOR(IN_12V0, "12.0V supply", IN, -1), + SENSOR(IN_1V2A, "1.2V analogue supply", IN, -1), + SENSOR(IN_VREF, "ref. voltage", IN, -1), + SENSOR(OUT_VAOE, "AOE power supply", IN, -1), + SENSOR(AOE_TEMP, "AOE temp.", TEMP, -1), + SENSOR(PSU_AOE_TEMP, "AOE PSU temp.", TEMP, -1), + SENSOR(PSU_TEMP, "Controller PSU temp.", TEMP, -1), + SENSOR(FAN_0, NULL, COOL, -1), + SENSOR(FAN_1, NULL, COOL, -1), + SENSOR(FAN_2, NULL, COOL, -1), + SENSOR(FAN_3, NULL, COOL, -1), + SENSOR(FAN_4, NULL, COOL, -1), + SENSOR(IN_VAOE, "AOE input supply", IN, -1), + SENSOR(OUT_IAOE, "AOE output current", CURR, -1), + SENSOR(IN_IAOE, "AOE input current", CURR, -1), + SENSOR(NIC_POWER, "Board power use", POWER, -1), + SENSOR(IN_0V9, "0.9V supply", IN, -1), + SENSOR(IN_I0V9, "0.9V input current", CURR, -1), + SENSOR(IN_I1V2, "1.2V input current", CURR, -1), + SENSOR(IN_0V9_ADC, "0.9V supply (at ADC)", IN, -1), + SENSOR(CONTROLLER_2_TEMP, "Controller ext. temp. 2", TEMP, -1), + SENSOR(VREG_INTERNAL_TEMP, "Voltage regulator temp.", TEMP, -1), + SENSOR(VREG_0V9_TEMP, "0.9V regulator temp.", TEMP, -1), + SENSOR(VREG_1V2_TEMP, "1.2V regulator temp.", TEMP, -1), + SENSOR(CONTROLLER_VPTAT, "Controller int. temp. raw", IN, -1), + SENSOR(CONTROLLER_INTERNAL_TEMP, "Controller int. temp.", TEMP, -1), + SENSOR(CONTROLLER_VPTAT_EXTADC, + "Controller int. temp. raw (at ADC)", IN, -1), + SENSOR(CONTROLLER_INTERNAL_TEMP_EXTADC, + "Controller int. temp. (via ADC)", TEMP, -1), + SENSOR(AMBIENT_TEMP, "Ambient temp.", TEMP, -1), + SENSOR(AIRFLOW, "Air flow raw", IN, -1), #undef SENSOR }; @@ -160,9 +191,19 @@ static ssize_t efx_mcdi_mon_show_value(struct device *dev, value = EFX_DWORD_FIELD(entry, MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_VALUE); - /* Convert temperature from degrees to milli-degrees Celsius */ - if (mon_attr->hwmon_type == EFX_HWMON_TEMP) + switch (mon_attr->hwmon_type) { + case EFX_HWMON_TEMP: + /* Convert temperature from degrees to milli-degrees Celsius */ value *= 1000; + break; + case EFX_HWMON_POWER: + /* Convert power from watts to microwatts */ + value *= 1000000; + break; + default: + /* No conversion needed */ + break; + } return sprintf(buf, "%u\n", value); } @@ -177,9 +218,19 @@ static ssize_t efx_mcdi_mon_show_limit(struct device *dev, value = mon_attr->limit_value; - /* Convert temperature from degrees to milli-degrees Celsius */ - if (mon_attr->hwmon_type == EFX_HWMON_TEMP) + switch (mon_attr->hwmon_type) { + case EFX_HWMON_TEMP: + /* Convert temperature from degrees to milli-degrees Celsius */ value *= 1000; + break; + case EFX_HWMON_POWER: + /* Convert power from watts to microwatts */ + value *= 1000000; + break; + default: + /* No conversion needed */ + break; + } return sprintf(buf, "%u\n", value); } @@ -243,8 +294,8 @@ efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name, int efx_mcdi_mon_probe(struct efx_nic *efx) { + unsigned int n_temp = 0, n_cool = 0, n_in = 0, n_curr = 0, n_power = 0; struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); - unsigned int n_temp = 0, n_cool = 0, n_in = 0; MCDI_DECLARE_BUF(inbuf, MC_CMD_SENSOR_INFO_EXT_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_SENSOR_INFO_OUT_LENMAX); unsigned int n_pages, n_sensors, n_attrs, page; @@ -380,6 +431,14 @@ int efx_mcdi_mon_probe(struct efx_nic *efx) hwmon_prefix = "in"; hwmon_index = n_in++; /* 0-based */ break; + case EFX_HWMON_CURR: + hwmon_prefix = "curr"; + hwmon_index = ++n_curr; /* 1-based */ + break; + case EFX_HWMON_POWER: + hwmon_prefix = "power"; + hwmon_index = ++n_power; /* 1-based */ + break; } min1 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY, @@ -399,13 +458,15 @@ int efx_mcdi_mon_probe(struct efx_nic *efx) if (rc) goto fail; - snprintf(name, sizeof(name), "%s%u_min", - hwmon_prefix, hwmon_index); - rc = efx_mcdi_mon_add_attr( - efx, name, efx_mcdi_mon_show_limit, - i, type, min1); - if (rc) - goto fail; + if (hwmon_type != EFX_HWMON_POWER) { + snprintf(name, sizeof(name), "%s%u_min", + hwmon_prefix, hwmon_index); + rc = efx_mcdi_mon_add_attr( + efx, name, efx_mcdi_mon_show_limit, + i, type, min1); + if (rc) + goto fail; + } snprintf(name, sizeof(name), "%s%u_max", hwmon_prefix, hwmon_index); diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index 9e824f74e8a1..b5cf62492f8e 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -3799,6 +3799,10 @@ #define NVRAM_PARTITION_TYPE_DUMP 0x800 /* enum: Application license key storage partition */ #define NVRAM_PARTITION_TYPE_LICENSE 0x900 +/* enum: Start of range used for PHY partitions (low 8 bits are the PHY ID) */ +#define NVRAM_PARTITION_TYPE_PHY_MIN 0xa00 +/* enum: End of range used for PHY partitions (low 8 bits are the PHY ID) */ +#define NVRAM_PARTITION_TYPE_PHY_MAX 0xaff /* enum: Start of reserved value range (firmware may use for any purpose) */ #define NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00 /* enum: End of reserved value range (firmware may use for any purpose) */ diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index 42d52f34ad79..8d33da6697fb 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2009-2010 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2009-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -830,6 +830,13 @@ static const struct efx_phy_operations efx_mcdi_phy_ops = { .get_module_info = efx_mcdi_phy_get_module_info, }; +u32 efx_mcdi_phy_get_caps(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_data = efx->phy_data; + + return phy_data->supported_cap; +} + static unsigned int efx_mcdi_event_link_speed[] = { [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100, [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000, @@ -1004,3 +1011,17 @@ void efx_mcdi_port_remove(struct efx_nic *efx) efx->phy_op->remove(efx); efx_nic_free_buffer(efx, &efx->stats_buffer); } + +/* Get physical port number (EF10 only; on Siena it is same as PF number) */ +int efx_mcdi_port_get_number(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN); + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + return rc; + + return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT); +} diff --git a/drivers/net/ethernet/sfc/mdio_10g.c b/drivers/net/ethernet/sfc/mdio_10g.c index 9acfd6696ffb..8ff954c59efa 100644 --- a/drivers/net/ethernet/sfc/mdio_10g.c +++ b/drivers/net/ethernet/sfc/mdio_10g.c @@ -1,5 +1,5 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2006-2011 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/net/ethernet/sfc/mdio_10g.h b/drivers/net/ethernet/sfc/mdio_10g.h index a97dbbd2de99..16824fecc5ee 100644 --- a/drivers/net/ethernet/sfc/mdio_10g.h +++ b/drivers/net/ethernet/sfc/mdio_10g.h @@ -1,5 +1,5 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2006-2011 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/net/ethernet/sfc/mtd.c b/drivers/net/ethernet/sfc/mtd.c index 8be9a69a61e1..a77a8bd2dd70 100644 --- a/drivers/net/ethernet/sfc/mtd.c +++ b/drivers/net/ethernet/sfc/mtd.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index e85bed09b5e9..b172ed133055 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2005-2011 Solarflare Communications Inc. + * Copyright 2005-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -39,7 +39,7 @@ * **************************************************************************/ -#define EFX_DRIVER_VERSION "3.2" +#define EFX_DRIVER_VERSION "4.0" #ifdef DEBUG #define EFX_BUG_ON_PARANOID(x) BUG_ON(x) @@ -135,6 +135,7 @@ struct efx_special_buffer { * freed when descriptor completes * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be * freed when descriptor completes. + * @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor. * @dma_addr: DMA address of the fragment. * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. @@ -146,7 +147,10 @@ struct efx_tx_buffer { const struct sk_buff *skb; void *heap_buf; }; - dma_addr_t dma_addr; + union { + efx_qword_t option; + dma_addr_t dma_addr; + }; unsigned short flags; unsigned short len; unsigned short unmap_len; @@ -155,6 +159,7 @@ struct efx_tx_buffer { #define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ #define EFX_TX_BUF_HEAP 4 /* buffer was allocated with kmalloc() */ #define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ +#define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */ /** * struct efx_tx_queue - An Efx TX queue @@ -297,7 +302,8 @@ struct efx_rx_page_state { * @added_count: Number of buffers added to the receive queue. * @notified_count: Number of buffers given to NIC (<= @added_count). * @removed_count: Number of buffers removed from the receive queue. - * @scatter_n: Number of buffers used by current packet + * @scatter_n: Used by NIC specific receive code. + * @scatter_len: Used by NIC specific receive code. * @page_ring: The ring to store DMA mapped pages for reuse. * @page_add: Counter to calculate the write pointer for the recycle ring. * @page_remove: Counter to calculate the read pointer for the recycle ring. @@ -329,6 +335,7 @@ struct efx_rx_queue { unsigned int notified_count; unsigned int removed_count; unsigned int scatter_n; + unsigned int scatter_len; struct page **page_ring; unsigned int page_add; unsigned int page_remove; @@ -382,6 +389,8 @@ enum efx_rx_alloc_method { * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to * lack of descriptors + * @n_rx_merge_events: Number of RX merged completion events + * @n_rx_merge_packets: Number of RX packets completed by merged events * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by * __efx_rx_packet(), or zero if there is none * @rx_pkt_index: Ring index of first buffer for next packet to be delivered @@ -418,6 +427,8 @@ struct efx_channel { unsigned n_rx_overlength; unsigned n_skbuff_leaks; unsigned int n_rx_nodesc_trunc; + unsigned int n_rx_merge_events; + unsigned int n_rx_merge_packets; unsigned int rx_pkt_n_frags; unsigned int rx_pkt_index; @@ -721,7 +732,7 @@ struct vfdi_status; * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID * @rps_expire_index: Next index to check for expiry in @rps_flow_id - * @drain_pending: Count of RX and TX queues that haven't been flushed and drained. + * @active_queues: Count of RX and TX queues that haven't been flushed and drained. * @rxq_flush_pending: Count of number of receive queues that need to be flushed. * Decremented when the efx_flush_rx_queue() is called. * @rxq_flush_outstanding: Count of number of RX flushes started but not yet @@ -862,7 +873,7 @@ struct efx_nic { unsigned int rps_expire_index; #endif - atomic_t drain_pending; + atomic_t active_queues; atomic_t rxq_flush_pending; atomic_t rxq_flush_outstanding; wait_queue_head_t flush_wq; @@ -1023,7 +1034,8 @@ struct efx_mtd_partition { * @rx_prefix_size: Size of RX prefix before packet data * @rx_hash_offset: Offset of RX flow hash within prefix * @rx_buffer_padding: Size of padding at end of RX packet - * @can_rx_scatter: NIC is able to scatter packet to multiple buffers + * @can_rx_scatter: NIC is able to scatter packets to multiple buffers + * @always_rx_scatter: NIC will always scatter packets to multiple buffers * @max_interrupt_mode: Highest capability interrupt mode supported * from &enum efx_init_mode. * @timer_period_max: Maximum period of interrupt timer (in ticks) @@ -1036,7 +1048,7 @@ struct efx_nic_type { int (*probe)(struct efx_nic *efx); void (*remove)(struct efx_nic *efx); int (*init)(struct efx_nic *efx); - void (*dimension_resources)(struct efx_nic *efx); + int (*dimension_resources)(struct efx_nic *efx); void (*fini)(struct efx_nic *efx); void (*monitor)(struct efx_nic *efx); enum reset_type (*map_reset_reason)(enum reset_type reason); @@ -1087,7 +1099,7 @@ struct efx_nic_type { void (*rx_write)(struct efx_rx_queue *rx_queue); void (*rx_defer_refill)(struct efx_rx_queue *rx_queue); int (*ev_probe)(struct efx_channel *channel); - void (*ev_init)(struct efx_channel *channel); + int (*ev_init)(struct efx_channel *channel); void (*ev_fini)(struct efx_channel *channel); void (*ev_remove)(struct efx_channel *channel); int (*ev_process)(struct efx_channel *channel, int quota); @@ -1142,6 +1154,7 @@ struct efx_nic_type { unsigned int rx_hash_offset; unsigned int rx_buffer_padding; bool can_rx_scatter; + bool always_rx_scatter; unsigned int max_interrupt_mode; unsigned int timer_period_max; netdev_features_t offload_features; diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index b9b127743a07..e7dbd2dd202e 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2011 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -33,9 +33,8 @@ int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer, unsigned int len, gfp_t gfp_flags) { - buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len, - &buffer->dma_addr, - gfp_flags | __GFP_ZERO); + buffer->addr = dma_zalloc_coherent(&efx->pci_dev->dev, len, + &buffer->dma_addr, gfp_flags); if (!buffer->addr) return -ENOMEM; buffer->len = len; diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 9afbf3616b4b..4b1e188f7a2f 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2011 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -17,15 +17,12 @@ #include "efx.h" #include "mcdi.h" -/* - * Falcon hardware control - */ - enum { EFX_REV_FALCON_A0 = 0, EFX_REV_FALCON_A1 = 1, EFX_REV_FALCON_B0 = 2, EFX_REV_SIENA_A0 = 3, + EFX_REV_HUNT_A0 = 4, }; static inline int efx_nic_rev(struct efx_nic *efx) @@ -347,6 +344,78 @@ struct siena_nic_data { u64 stats[SIENA_STAT_COUNT]; }; +enum { + EF10_STAT_tx_bytes, + EF10_STAT_tx_packets, + EF10_STAT_tx_pause, + EF10_STAT_tx_control, + EF10_STAT_tx_unicast, + EF10_STAT_tx_multicast, + EF10_STAT_tx_broadcast, + EF10_STAT_tx_lt64, + EF10_STAT_tx_64, + EF10_STAT_tx_65_to_127, + EF10_STAT_tx_128_to_255, + EF10_STAT_tx_256_to_511, + EF10_STAT_tx_512_to_1023, + EF10_STAT_tx_1024_to_15xx, + EF10_STAT_tx_15xx_to_jumbo, + EF10_STAT_rx_bytes, + EF10_STAT_rx_bytes_minus_good_bytes, + EF10_STAT_rx_good_bytes, + EF10_STAT_rx_bad_bytes, + EF10_STAT_rx_packets, + EF10_STAT_rx_good, + EF10_STAT_rx_bad, + EF10_STAT_rx_pause, + EF10_STAT_rx_control, + EF10_STAT_rx_unicast, + EF10_STAT_rx_multicast, + EF10_STAT_rx_broadcast, + EF10_STAT_rx_lt64, + EF10_STAT_rx_64, + EF10_STAT_rx_65_to_127, + EF10_STAT_rx_128_to_255, + EF10_STAT_rx_256_to_511, + EF10_STAT_rx_512_to_1023, + EF10_STAT_rx_1024_to_15xx, + EF10_STAT_rx_15xx_to_jumbo, + EF10_STAT_rx_gtjumbo, + EF10_STAT_rx_bad_gtjumbo, + EF10_STAT_rx_overflow, + EF10_STAT_rx_align_error, + EF10_STAT_rx_length_error, + EF10_STAT_rx_nodesc_drops, + EF10_STAT_COUNT +}; + +/** + * struct efx_ef10_nic_data - EF10 architecture NIC state + * @mcdi_buf: DMA buffer for MCDI + * @warm_boot_count: Last seen MC warm boot count + * @vi_base: Absolute index of first VI in this function + * @n_allocated_vis: Number of VIs allocated to this function + * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot + * @must_restore_filters: Flag: filters have yet to be restored after MC reboot + * @rx_rss_context: Firmware handle for our RSS context + * @stats: Hardware statistics + * @workaround_35388: Flag: firmware supports workaround for bug 35388 + * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of + * %MC_CMD_GET_CAPABILITIES response) + */ +struct efx_ef10_nic_data { + struct efx_buffer mcdi_buf; + u16 warm_boot_count; + unsigned int vi_base; + unsigned int n_allocated_vis; + bool must_realloc_vis; + bool must_restore_filters; + u32 rx_rss_context; + u64 stats[EF10_STAT_COUNT]; + bool workaround_35388; + u32 datapath_caps; +}; + /* * On the SFC9000 family each port is associated with 1 PCI physical * function (PF) handled by sfc and a configurable number of virtual @@ -448,6 +517,7 @@ extern void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev); extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; extern const struct efx_nic_type siena_a0_nic_type; +extern const struct efx_nic_type efx_hunt_a0_nic_type; /************************************************************************** * @@ -503,9 +573,9 @@ static inline int efx_nic_probe_eventq(struct efx_channel *channel) { return channel->efx->type->ev_probe(channel); } -static inline void efx_nic_init_eventq(struct efx_channel *channel) +static inline int efx_nic_init_eventq(struct efx_channel *channel) { - channel->efx->type->ev_init(channel); + return channel->efx->type->ev_init(channel); } static inline void efx_nic_fini_eventq(struct efx_channel *channel) { @@ -539,7 +609,7 @@ extern void efx_farch_rx_remove(struct efx_rx_queue *rx_queue); extern void efx_farch_rx_write(struct efx_rx_queue *rx_queue); extern void efx_farch_rx_defer_refill(struct efx_rx_queue *rx_queue); extern int efx_farch_ev_probe(struct efx_channel *channel); -extern void efx_farch_ev_init(struct efx_channel *channel); +extern int efx_farch_ev_init(struct efx_channel *channel); extern void efx_farch_ev_fini(struct efx_channel *channel); extern void efx_farch_ev_remove(struct efx_channel *channel); extern int efx_farch_ev_process(struct efx_channel *channel, int quota); @@ -627,6 +697,7 @@ extern void falcon_stop_nic_stats(struct efx_nic *efx); extern int falcon_reset_xaui(struct efx_nic *efx); extern void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw); extern void efx_farch_init_common(struct efx_nic *efx); +extern void efx_ef10_handle_drain_event(struct efx_nic *efx); static inline void efx_nic_push_rx_indir_table(struct efx_nic *efx) { efx->type->rx_push_indir_table(efx); diff --git a/drivers/net/ethernet/sfc/phy.h b/drivers/net/ethernet/sfc/phy.h index 4f6eb8177a6d..45eeb7075156 100644 --- a/drivers/net/ethernet/sfc/phy.h +++ b/drivers/net/ethernet/sfc/phy.h @@ -1,5 +1,5 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2007-2010 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index c60cabb6ff05..03acf57df045 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2011 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2011-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/qt202x_phy.c b/drivers/net/ethernet/sfc/qt202x_phy.c index 326a28637f3c..efa3612affca 100644 --- a/drivers/net/ethernet/sfc/qt202x_phy.c +++ b/drivers/net/ethernet/sfc/qt202x_phy.c @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2006-2010 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2006-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 864b6ffc9cb2..4a596725023f 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2005-2011 Solarflare Communications Inc. + * Copyright 2005-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -529,8 +529,8 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, if (!(flags & EFX_RX_PKT_PREFIX_LEN)) efx_rx_packet__check_len(rx_queue, rx_buf, len); } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || - unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) || - unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) || + unlikely(len <= (n_frags - 1) * efx->rx_dma_len) || + unlikely(len > n_frags * efx->rx_dma_len) || unlikely(!efx->rx_scatter)) { /* If this isn't an explicit discard request, either * the hardware or the driver is broken. @@ -581,9 +581,9 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, rx_buf = efx_rx_buf_next(rx_queue, rx_buf); if (--tail_frags == 0) break; - efx_sync_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE); + efx_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len); } - rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE; + rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len; efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); } @@ -903,3 +903,37 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) } #endif /* CONFIG_RFS_ACCEL */ + +/** + * efx_filter_is_mc_recipient - test whether spec is a multicast recipient + * @spec: Specification to test + * + * Return: %true if the specification is a non-drop RX filter that + * matches a local MAC address I/G bit value of 1 or matches a local + * IPv4 or IPv6 address value in the respective multicast address + * range. Otherwise %false. + */ +bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) +{ + if (!(spec->flags & EFX_FILTER_FLAG_RX) || + spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) + return false; + + if (spec->match_flags & + (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && + is_multicast_ether_addr(spec->loc_mac)) + return true; + + if ((spec->match_flags & + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { + if (spec->ether_type == htons(ETH_P_IP) && + ipv4_is_multicast(spec->loc_host[0])) + return true; + if (spec->ether_type == htons(ETH_P_IPV6) && + ((const u8 *)spec->loc_host)[0] == 0xff) + return true; + } + + return false; +} diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 716cff9d0160..144bbff5a4ae 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h index aed24b736059..87698ae0bf75 100644 --- a/drivers/net/ethernet/sfc/selftest.h +++ b/drivers/net/ethernet/sfc/selftest.h @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 89180d475367..d034bcd124ef 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -177,13 +177,14 @@ static int siena_probe_nvconfig(struct efx_nic *efx) return rc; } -static void siena_dimension_resources(struct efx_nic *efx) +static int siena_dimension_resources(struct efx_nic *efx) { /* Each port has a small block of internal SRAM dedicated to * the buffer table and descriptor caches. In theory we can * map both blocks to one port, but we don't. */ efx_farch_dimension_resources(efx, FR_CZ_BUF_FULL_TBL_ROWS / 2); + return 0; } static unsigned int siena_mem_map_size(struct efx_nic *efx) @@ -195,7 +196,6 @@ static unsigned int siena_mem_map_size(struct efx_nic *efx) static int siena_probe_nic(struct efx_nic *efx) { struct siena_nic_data *nic_data; - bool already_attached = false; efx_oword_t reg; int rc; @@ -221,19 +221,6 @@ static int siena_probe_nic(struct efx_nic *efx) if (rc) goto fail1; - /* Let the BMC know that the driver is now in charge of link and - * filter settings. We must do this before we reset the NIC */ - rc = efx_mcdi_drv_attach(efx, true, &already_attached); - if (rc) { - netif_err(efx, probe, efx->net_dev, - "Unable to register driver with MCPU\n"); - goto fail2; - } - if (already_attached) - /* Not a fatal error */ - netif_err(efx, probe, efx->net_dev, - "Host already registered with MCPU\n"); - /* Now we can reset the NIC */ rc = efx_mcdi_reset(efx, RESET_TYPE_ALL); if (rc) { @@ -280,8 +267,6 @@ fail5: efx_nic_free_buffer(efx, &efx->irq_status); fail4: fail3: - efx_mcdi_drv_attach(efx, false, NULL); -fail2: efx_mcdi_fini(efx); fail1: kfree(efx->nic_data); @@ -370,14 +355,11 @@ static void siena_remove_nic(struct efx_nic *efx) efx_mcdi_reset(efx, RESET_TYPE_ALL); - /* Relinquish the device back to the BMC */ - efx_mcdi_drv_attach(efx, false, NULL); + efx_mcdi_fini(efx); /* Tear down the private nic state */ kfree(efx->nic_data); efx->nic_data = NULL; - - efx_mcdi_fini(efx); } #define SIENA_DMA_STAT(ext_name, mcdi_name) \ diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 4b8eef962faa..0c38f926871e 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2010-2011 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2010-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/net/ethernet/sfc/tenxpress.c b/drivers/net/ethernet/sfc/tenxpress.c index d37cb5017129..2c90e6b31575 100644 --- a/drivers/net/ethernet/sfc/tenxpress.c +++ b/drivers/net/ethernet/sfc/tenxpress.c @@ -1,5 +1,5 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2007-2011 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 85ee647b28ad..2ac91c5b5eea 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2005-2010 Solarflare Communications Inc. + * Copyright 2005-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -306,7 +306,9 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, while (read_ptr != stop_index) { struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; - if (unlikely(buffer->len == 0)) { + + if (!(buffer->flags & EFX_TX_BUF_OPTION) && + unlikely(buffer->len == 0)) { netif_err(efx, tx_err, efx->net_dev, "TX queue %d spurious TX completion id %x\n", tx_queue->queue, read_ptr); diff --git a/drivers/net/ethernet/sfc/txc43128_phy.c b/drivers/net/ethernet/sfc/txc43128_phy.c index 29bb3f9941c0..3d5ee3259885 100644 --- a/drivers/net/ethernet/sfc/txc43128_phy.c +++ b/drivers/net/ethernet/sfc/txc43128_phy.c @@ -1,5 +1,5 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2006-2011 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/net/ethernet/sfc/vfdi.h b/drivers/net/ethernet/sfc/vfdi.h index 225557caaf5a..ae044f44936a 100644 --- a/drivers/net/ethernet/sfc/vfdi.h +++ b/drivers/net/ethernet/sfc/vfdi.h @@ -1,5 +1,5 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2010-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h index 7e5be1d873a7..2310b75d4ec2 100644 --- a/drivers/net/ethernet/sfc/workarounds.h +++ b/drivers/net/ethernet/sfc/workarounds.h @@ -1,6 +1,6 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2006-2010 Solarflare Communications Inc. + * Driver for Solarflare network controllers and boards + * Copyright 2006-2013 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -44,4 +44,10 @@ /* Leak overlength packets rather than free */ #define EFX_WORKAROUND_8071 EFX_WORKAROUND_FALCON_A +/* Lockup when writing event block registers at gen2/gen3 */ +#define EFX_EF10_WORKAROUND_35388(efx) \ + (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_35388) +#define EFX_WORKAROUND_35388(efx) \ + (efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx)) + #endif /* EFX_WORKAROUNDS_H */ diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 9f5f35e041ac..770036bc2d87 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -212,9 +212,8 @@ static void meth_check_link(struct net_device *dev) static int meth_init_tx_ring(struct meth_private *priv) { /* Init TX ring */ - priv->tx_ring = dma_alloc_coherent(NULL, TX_RING_BUFFER_SIZE, - &priv->tx_ring_dma, - GFP_ATOMIC | __GFP_ZERO); + priv->tx_ring = dma_zalloc_coherent(NULL, TX_RING_BUFFER_SIZE, + &priv->tx_ring_dma, GFP_ATOMIC); if (!priv->tx_ring) return -ENOMEM; diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 345558fe7367..afe01c4088a3 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -2067,7 +2067,7 @@ static int smc911x_drv_probe(struct platform_device *pdev) lp->netdev = ndev; #ifdef SMC_DYNAMIC_BUS_CONFIG { - struct smc911x_platdata *pd = pdev->dev.platform_data; + struct smc911x_platdata *pd = dev_get_platdata(&pdev->dev); if (!pd) { ret = -EINVAL; goto release_both; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index cde13be7c7de..73be7f3982e6 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2202,7 +2202,7 @@ static void smc_release_datacs(struct platform_device *pdev, struct net_device * */ static int smc_drv_probe(struct platform_device *pdev) { - struct smc91x_platdata *pd = pdev->dev.platform_data; + struct smc91x_platdata *pd = dev_get_platdata(&pdev->dev); struct smc_local *lp; struct net_device *ndev; struct resource *res, *ires; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index a1419211585b..5fdbc2686eb3 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2374,7 +2374,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct net_device *dev; struct smsc911x_data *pdata; - struct smsc911x_platform_config *config = pdev->dev.platform_data; + struct smsc911x_platform_config *config = dev_get_platdata(&pdev->dev); struct resource *res, *irq_res; unsigned int intcfg = 0; int res_size, irq_flags; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index be406911fd01..8d4ccd35a016 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1224,7 +1224,9 @@ static void free_dma_desc_resources(struct stmmac_priv *priv) */ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) { - if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) { + if (priv->plat->force_thresh_dma_mode) + priv->hw->dma->dma_mode(priv->ioaddr, tc, tc); + else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) { /* * In case of GMAC, SF mode can be enabled * to perform the TX COE in HW. This depends on: diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index da8be6e63096..623ebc50fe6b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -79,6 +79,11 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl); dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst"); dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst"); + plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode"); + if (plat->force_thresh_dma_mode) { + plat->force_sf_dma_mode = 0; + pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set."); + } return 0; } @@ -113,7 +118,7 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) if (IS_ERR(addr)) return PTR_ERR(addr); - plat_dat = pdev->dev.platform_data; + plat_dat = dev_get_platdata(&pdev->dev); if (pdev->dev.of_node) { if (!plat_dat) plat_dat = devm_kzalloc(&pdev->dev, diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index fa322409bff3..269c08bf399b 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9360,7 +9360,7 @@ static ssize_t show_port_phy(struct device *dev, struct device_attribute *attr, char *buf) { struct platform_device *plat_dev = to_platform_device(dev); - struct niu_parent *p = plat_dev->dev.platform_data; + struct niu_parent *p = dev_get_platdata(&plat_dev->dev); u32 port_phy = p->port_phy; char *orig_buf = buf; int i; @@ -9390,7 +9390,7 @@ static ssize_t show_plat_type(struct device *dev, struct device_attribute *attr, char *buf) { struct platform_device *plat_dev = to_platform_device(dev); - struct niu_parent *p = plat_dev->dev.platform_data; + struct niu_parent *p = dev_get_platdata(&plat_dev->dev); const char *type_str; switch (p->plat_type) { @@ -9419,7 +9419,7 @@ static ssize_t __show_chan_per_port(struct device *dev, int rx) { struct platform_device *plat_dev = to_platform_device(dev); - struct niu_parent *p = plat_dev->dev.platform_data; + struct niu_parent *p = dev_get_platdata(&plat_dev->dev); char *orig_buf = buf; u8 *arr; int i; @@ -9452,7 +9452,7 @@ static ssize_t show_num_ports(struct device *dev, struct device_attribute *attr, char *buf) { struct platform_device *plat_dev = to_platform_device(dev); - struct niu_parent *p = plat_dev->dev.platform_data; + struct niu_parent *p = dev_get_platdata(&plat_dev->dev); return sprintf(buf, "%d\n", p->num_ports); } diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 31bbbca341a7..2dc16b6efaf0 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -636,7 +636,7 @@ static void cpmac_hw_stop(struct net_device *dev) { int i; struct cpmac_priv *priv = netdev_priv(dev); - struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data; + struct plat_cpmac_data *pdata = dev_get_platdata(&priv->pdev->dev); ar7_device_reset(pdata->reset_bit); cpmac_write(priv->regs, CPMAC_RX_CONTROL, @@ -659,7 +659,7 @@ static void cpmac_hw_start(struct net_device *dev) { int i; struct cpmac_priv *priv = netdev_priv(dev); - struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data; + struct plat_cpmac_data *pdata = dev_get_platdata(&priv->pdev->dev); ar7_device_reset(pdata->reset_bit); for (i = 0; i < 8; i++) { @@ -1118,7 +1118,7 @@ static int cpmac_probe(struct platform_device *pdev) struct net_device *dev; struct plat_cpmac_data *pdata; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (external_switch || dumb_switch) { strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); /* fixed phys bus */ diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 1a222bce4bd7..67df09ea9d04 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1761,7 +1761,7 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) const u8 *mac_addr; if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node) - return pdev->dev.platform_data; + return dev_get_platdata(&pdev->dev); pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 7f8514384863..4ec92659a100 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -314,7 +314,7 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data, static int davinci_mdio_probe(struct platform_device *pdev) { - struct mdio_platform_data *pdata = pdev->dev.platform_data; + struct mdio_platform_data *pdata = dev_get_platdata(&pdev->dev); struct device *dev = &pdev->dev; struct davinci_mdio_data *data; struct resource *res; diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 01bdc6ca0755..c4dbf981804b 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1308,13 +1308,13 @@ static int tsi108_open(struct net_device *dev) data->id, dev->irq, dev->name); } - data->rxring = dma_alloc_coherent(NULL, rxring_size, &data->rxdma, - GFP_KERNEL | __GFP_ZERO); + data->rxring = dma_zalloc_coherent(NULL, rxring_size, &data->rxdma, + GFP_KERNEL); if (!data->rxring) return -ENOMEM; - data->txring = dma_alloc_coherent(NULL, txring_size, &data->txdma, - GFP_KERNEL | __GFP_ZERO); + data->txring = dma_zalloc_coherent(NULL, txring_size, &data->txdma, + GFP_KERNEL); if (!data->txring) { pci_free_consistent(0, rxring_size, data->rxring, data->rxdma); return -ENOMEM; @@ -1558,7 +1558,7 @@ tsi108_init_one(struct platform_device *pdev) hw_info *einfo; int err = 0; - einfo = pdev->dev.platform_data; + einfo = dev_get_platdata(&pdev->dev); if (NULL == einfo) { printk(KERN_ERR "tsi-eth %d: Missing additional data!\n", diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index 30fed08d1674..0df36c6ec7f4 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -622,7 +622,7 @@ static const struct net_device_ops w5100_netdev_ops = { static int w5100_hw_probe(struct platform_device *pdev) { - struct wiznet_platform_data *data = pdev->dev.platform_data; + struct wiznet_platform_data *data = dev_get_platdata(&pdev->dev); struct net_device *ndev = platform_get_drvdata(pdev); struct w5100_priv *priv = netdev_priv(ndev); const char *name = netdev_name(ndev); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index e92884564e1e..71c27b3292f1 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -542,7 +542,7 @@ static const struct net_device_ops w5300_netdev_ops = { static int w5300_hw_probe(struct platform_device *pdev) { - struct wiznet_platform_data *data = pdev->dev.platform_data; + struct wiznet_platform_data *data = dev_get_platdata(&pdev->dev); struct net_device *ndev = platform_get_drvdata(pdev); struct w5300_priv *priv = netdev_priv(ndev); const char *name = netdev_name(ndev); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 58eb4488beff..b88121f240ca 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -243,15 +243,15 @@ static int temac_dma_bd_init(struct net_device *ndev) /* allocate the tx and rx ring buffer descriptors. */ /* returns a virtual address and a physical address. */ - lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent, - sizeof(*lp->tx_bd_v) * TX_BD_NUM, - &lp->tx_bd_p, GFP_KERNEL | __GFP_ZERO); + lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent, + sizeof(*lp->tx_bd_v) * TX_BD_NUM, + &lp->tx_bd_p, GFP_KERNEL); if (!lp->tx_bd_v) goto out; - lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent, - sizeof(*lp->rx_bd_v) * RX_BD_NUM, - &lp->rx_bd_p, GFP_KERNEL | __GFP_ZERO); + lp->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent, + sizeof(*lp->rx_bd_v) * RX_BD_NUM, + &lp->rx_bd_p, GFP_KERNEL); if (!lp->rx_bd_v) goto out; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index fb7d1c28a2ea..b2ff038d6d20 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -201,17 +201,15 @@ static int axienet_dma_bd_init(struct net_device *ndev) /* * Allocate the Tx and Rx buffer descriptors. */ - lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent, - sizeof(*lp->tx_bd_v) * TX_BD_NUM, - &lp->tx_bd_p, - GFP_KERNEL | __GFP_ZERO); + lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent, + sizeof(*lp->tx_bd_v) * TX_BD_NUM, + &lp->tx_bd_p, GFP_KERNEL); if (!lp->tx_bd_v) goto out; - lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent, - sizeof(*lp->rx_bd_v) * RX_BD_NUM, - &lp->rx_bd_p, - GFP_KERNEL | __GFP_ZERO); + lp->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent, + sizeof(*lp->rx_bd_v) * RX_BD_NUM, + &lp->rx_bd_p, GFP_KERNEL); if (!lp->rx_bd_v) goto out; diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 3d689fcb7917..e78802e75ea6 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1384,7 +1384,7 @@ static int eth_init_one(struct platform_device *pdev) { struct port *port; struct net_device *dev; - struct eth_plat_info *plat = pdev->dev.platform_data; + struct eth_plat_info *plat = dev_get_platdata(&pdev->dev); u32 regs_phys; char phy_id[MII_BUS_ID_SIZE + 3]; int err; diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 4c8ddc944d51..0b40e1c46f07 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -1068,9 +1068,9 @@ static int dfx_driver_init(struct net_device *dev, const char *print_name, #endif sizeof(PI_CONSUMER_BLOCK) + (PI_ALIGN_K_DESC_BLK - 1); - bp->kmalloced = top_v = dma_alloc_coherent(bp->bus_dev, alloc_size, - &bp->kmalloced_dma, - GFP_ATOMIC | __GFP_ZERO); + bp->kmalloced = top_v = dma_zalloc_coherent(bp->bus_dev, alloc_size, + &bp->kmalloced_dma, + GFP_ATOMIC); if (top_v == NULL) return DFX_K_FAILURE; diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c index 3adb43ce138f..7bbd318bc93e 100644 --- a/drivers/net/irda/ali-ircc.c +++ b/drivers/net/irda/ali-ircc.c @@ -351,16 +351,16 @@ static int ali_ircc_open(int i, chipio_t *info) /* Allocate memory if needed */ self->rx_buff.head = - dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->rx_buff.truesize, + &self->rx_buff_dma, GFP_KERNEL); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto err_out2; } self->tx_buff.head = - dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->tx_buff.truesize, + &self->tx_buff_dma, GFP_KERNEL); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto err_out3; diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c index 9cf836b57c49..ceeb53737f86 100644 --- a/drivers/net/irda/nsc-ircc.c +++ b/drivers/net/irda/nsc-ircc.c @@ -430,8 +430,8 @@ static int __init nsc_ircc_open(chipio_t *info) /* Allocate memory if needed */ self->rx_buff.head = - dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->rx_buff.truesize, + &self->rx_buff_dma, GFP_KERNEL); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto out2; @@ -439,8 +439,8 @@ static int __init nsc_ircc_open(chipio_t *info) } self->tx_buff.head = - dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->tx_buff.truesize, + &self->tx_buff_dma, GFP_KERNEL); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto out3; diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c index aa05dad75335..0dcdf1592f6b 100644 --- a/drivers/net/irda/smsc-ircc2.c +++ b/drivers/net/irda/smsc-ircc2.c @@ -562,14 +562,14 @@ static int smsc_ircc_open(unsigned int fir_base, unsigned int sir_base, u8 dma, self->tx_buff.truesize = SMSC_IRCC2_TX_BUFF_TRUESIZE; self->rx_buff.head = - dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->rx_buff.truesize, + &self->rx_buff_dma, GFP_KERNEL); if (self->rx_buff.head == NULL) goto err_out2; self->tx_buff.head = - dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->tx_buff.truesize, + &self->tx_buff_dma, GFP_KERNEL); if (self->tx_buff.head == NULL) goto err_out3; diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c index 2dcc60fb37f1..9abaec27f962 100644 --- a/drivers/net/irda/via-ircc.c +++ b/drivers/net/irda/via-ircc.c @@ -361,16 +361,16 @@ static int via_ircc_open(struct pci_dev *pdev, chipio_t *info, unsigned int id) /* Allocate memory if needed */ self->rx_buff.head = - dma_alloc_coherent(&pdev->dev, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(&pdev->dev, self->rx_buff.truesize, + &self->rx_buff_dma, GFP_KERNEL); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto err_out2; } self->tx_buff.head = - dma_alloc_coherent(&pdev->dev, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(&pdev->dev, self->tx_buff.truesize, + &self->tx_buff_dma, GFP_KERNEL); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto err_out3; diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c index bb8857a158a6..e641bb240362 100644 --- a/drivers/net/irda/w83977af_ir.c +++ b/drivers/net/irda/w83977af_ir.c @@ -215,16 +215,16 @@ static int w83977af_open(int i, unsigned int iobase, unsigned int irq, /* Allocate memory if needed */ self->rx_buff.head = - dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->rx_buff.truesize, + &self->rx_buff_dma, GFP_KERNEL); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto err_out1; } self->tx_buff.head = - dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); + dma_zalloc_coherent(NULL, self->tx_buff.truesize, + &self->tx_buff_dma, GFP_KERNEL); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto err_out2; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 510a9b60fde1..64dfaa303dcc 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -686,7 +686,7 @@ void macvlan_common_setup(struct net_device *dev) dev->priv_flags |= IFF_UNICAST_FLT; dev->netdev_ops = &macvlan_netdev_ops; dev->destructor = free_netdev; - dev->header_ops = &macvlan_hard_header_ops, + dev->header_ops = &macvlan_hard_header_ops; dev->ethtool_ops = &macvlan_ethtool_ops; } EXPORT_SYMBOL_GPL(macvlan_common_setup); @@ -823,7 +823,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, if (port->count) return -EINVAL; port->passthru = true; - memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(dev, lowerdev); } err = netdev_upper_dev_link(lowerdev, dev); diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index a47f9236d966..8004acbef2c9 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -191,7 +191,7 @@ static int mdio_gpio_probe(struct platform_device *pdev) pdata = mdio_gpio_of_get_data(pdev); bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio"); } else { - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); bus_id = pdev->id; } diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c index e91d7d736ae2..d2dd9e473e2c 100644 --- a/drivers/net/phy/mdio-mux-gpio.c +++ b/drivers/net/phy/mdio-mux-gpio.c @@ -106,7 +106,7 @@ err: static int mdio_mux_gpio_remove(struct platform_device *pdev) { - struct mdio_mux_gpio_state *s = pdev->dev.platform_data; + struct mdio_mux_gpio_state *s = dev_get_platdata(&pdev->dev); mdio_mux_uninit(s->mux_handle); return 0; } diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index 7f25e49ae37f..18969b3ad8bb 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -101,6 +101,7 @@ static int sun4i_mdio_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct mii_bus *bus; struct sun4i_mdio_data *data; + struct resource *res; int ret, i; bus = mdiobus_alloc_size(sizeof(*data)); @@ -114,7 +115,8 @@ static int sun4i_mdio_probe(struct platform_device *pdev) snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(&pdev->dev)); bus->parent = &pdev->dev; - bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); + bus->irq = devm_kzalloc(&pdev->dev, sizeof(int) * PHY_MAX_ADDR, + GFP_KERNEL); if (!bus->irq) { ret = -ENOMEM; goto err_out_free_mdiobus; @@ -124,10 +126,11 @@ static int sun4i_mdio_probe(struct platform_device *pdev) bus->irq[i] = PHY_POLL; data = bus->priv; - data->membase = of_iomap(np, 0); - if (!data->membase) { - ret = -ENOMEM; - goto err_out_free_mdio_irq; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->membase = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(data->membase)) { + ret = PTR_ERR(data->membase); + goto err_out_free_mdiobus; } data->regulator = devm_regulator_get(&pdev->dev, "phy"); @@ -139,7 +142,7 @@ static int sun4i_mdio_probe(struct platform_device *pdev) } else { ret = regulator_enable(data->regulator); if (ret) - goto err_out_free_mdio_irq; + goto err_out_free_mdiobus; } ret = of_mdiobus_register(bus, np); @@ -152,8 +155,6 @@ static int sun4i_mdio_probe(struct platform_device *pdev) err_out_disable_regulator: regulator_disable(data->regulator); -err_out_free_mdio_irq: - kfree(bus->irq); err_out_free_mdiobus: mdiobus_free(bus); return ret; @@ -164,7 +165,6 @@ static int sun4i_mdio_remove(struct platform_device *pdev) struct mii_bus *bus = platform_get_drvdata(pdev); mdiobus_unregister(bus); - kfree(bus->irq); mdiobus_free(bus); return 0; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 9ccccd40c410..50e43e64d51d 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1974,7 +1974,7 @@ static void team_setup_by_port(struct net_device *dev, dev->addr_len = port_dev->addr_len; dev->mtu = port_dev->mtu; memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len); - memcpy(dev->dev_addr, port_dev->dev_addr, port_dev->addr_len); + eth_hw_addr_inherit(dev, port_dev); } static int team_dev_type_check_change(struct net_device *dev, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f21600277583..defec2b3c5a4 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1538,6 +1538,8 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO); /* (!csum && gso) case will be fixed by register_netdev() */ } + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) + dev->features |= NETIF_F_RXCSUM; dev->vlan_features = dev->features; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3b21aca0c0c2..6b560f373fc3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -6,9 +6,6 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * TODO - * - IPv6 (not in RFC) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -43,6 +40,12 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/vxlan.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/addrconf.h> +#include <net/ip6_tunnel.h> +#include <net/ip6_checksum.h> +#endif #define VXLAN_VERSION "0.1" @@ -59,6 +62,8 @@ #define VXLAN_VID_MASK (VXLAN_N_VID - 1) /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) +/* IPv6 header + UDP + VXLAN + Ethernet header */ +#define VXLAN6_HEADROOM (40 + 8 + 8 + 14) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ @@ -92,8 +97,14 @@ struct vxlan_net { spinlock_t sock_lock; }; +union vxlan_addr { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + struct sockaddr sa; +}; + struct vxlan_rdst { - __be32 remote_ip; + union vxlan_addr remote_ip; __be16 remote_port; u32 remote_vni; u32 remote_ifindex; @@ -120,7 +131,7 @@ struct vxlan_dev { struct vxlan_sock *vn_sock; /* listening socket */ struct net_device *dev; struct vxlan_rdst default_dst; /* default destination */ - __be32 saddr; /* source address */ + union vxlan_addr saddr; /* source address */ __be16 dst_port; __u16 port_min; /* source port range */ __u16 port_max; @@ -146,6 +157,7 @@ struct vxlan_dev { #define VXLAN_F_RSC 0x04 #define VXLAN_F_L2MISS 0x08 #define VXLAN_F_L3MISS 0x10 +#define VXLAN_F_IPV6 0x20 /* internal flag */ /* salt for hash table */ static u32 vxlan_salt __read_mostly; @@ -153,6 +165,96 @@ static struct workqueue_struct *vxlan_wq; static void vxlan_sock_work(struct work_struct *work); +#if IS_ENABLED(CONFIG_IPV6) +static inline +bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) +{ + if (a->sa.sa_family != b->sa.sa_family) + return false; + if (a->sa.sa_family == AF_INET6) + return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr); + else + return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; +} + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_any(&ipa->sin6.sin6_addr); + else + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); + else + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) +{ + if (nla_len(nla) >= sizeof(struct in6_addr)) { + nla_memcpy(&ip->sin6.sin6_addr, nla, sizeof(struct in6_addr)); + ip->sa.sa_family = AF_INET6; + return 0; + } else if (nla_len(nla) >= sizeof(__be32)) { + ip->sin.sin_addr.s_addr = nla_get_be32(nla); + ip->sa.sa_family = AF_INET; + return 0; + } else { + return -EAFNOSUPPORT; + } +} + +static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, + const union vxlan_addr *ip) +{ + if (ip->sa.sa_family == AF_INET6) + return nla_put(skb, attr, sizeof(struct in6_addr), &ip->sin6.sin6_addr); + else + return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr); +} + +#else /* !CONFIG_IPV6 */ + +static inline +bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) +{ + return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; +} + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) +{ + if (nla_len(nla) >= sizeof(struct in6_addr)) { + return -EAFNOSUPPORT; + } else if (nla_len(nla) >= sizeof(__be32)) { + ip->sin.sin_addr.s_addr = nla_get_be32(nla); + ip->sa.sa_family = AF_INET; + return 0; + } else { + return -EAFNOSUPPORT; + } +} + +static int vxlan_nla_put_addr(struct sk_buff *skb, int attr, + const union vxlan_addr *ip) +{ + return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr); +} +#endif + /* Virtual Network hash table head */ static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) { @@ -239,7 +341,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (type == RTM_GETNEIGH) { ndm->ndm_family = AF_INET; - send_ip = rdst->remote_ip != htonl(INADDR_ANY); + send_ip = !vxlan_addr_any(&rdst->remote_ip); send_eth = !is_zero_ether_addr(fdb->eth_addr); } else ndm->ndm_family = AF_BRIDGE; @@ -251,7 +353,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; - if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip)) + if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip)) goto nla_put_failure; if (rdst->remote_port && rdst->remote_port != vxlan->dst_port && @@ -283,7 +385,7 @@ static inline size_t vxlan_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ - + nla_total_size(sizeof(__be32)) /* NDA_DST */ + + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */ + nla_total_size(sizeof(__be16)) /* NDA_PORT */ + nla_total_size(sizeof(__be32)) /* NDA_VNI */ + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */ @@ -317,14 +419,14 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) +static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb f = { .state = NUD_STALE, }; struct vxlan_rdst remote = { - .remote_ip = ipa, /* goes to NDA_DST */ + .remote_ip = *ipa, /* goes to NDA_DST */ .remote_vni = VXLAN_N_VID, }; @@ -397,13 +499,13 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, /* caller should hold vxlan->hash_lock */ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, - __be32 ip, __be16 port, + union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex) { struct vxlan_rdst *rd; list_for_each_entry(rd, &f->remotes, list) { - if (rd->remote_ip == ip && + if (vxlan_addr_equal(&rd->remote_ip, ip) && rd->remote_port == port && rd->remote_vni == vni && rd->remote_ifindex == ifindex) @@ -415,7 +517,7 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, /* Replace destination of unicast mac */ static int vxlan_fdb_replace(struct vxlan_fdb *f, - __be32 ip, __be16 port, __u32 vni, __u32 ifindex) + union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex) { struct vxlan_rdst *rd; @@ -426,7 +528,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list); if (!rd) return 0; - rd->remote_ip = ip; + rd->remote_ip = *ip; rd->remote_port = port; rd->remote_vni = vni; rd->remote_ifindex = ifindex; @@ -435,7 +537,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, /* Add/update destinations for multicast */ static int vxlan_fdb_append(struct vxlan_fdb *f, - __be32 ip, __be16 port, __u32 vni, __u32 ifindex) + union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex) { struct vxlan_rdst *rd; @@ -446,7 +548,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd = kmalloc(sizeof(*rd), GFP_ATOMIC); if (rd == NULL) return -ENOBUFS; - rd->remote_ip = ip; + rd->remote_ip = *ip; rd->remote_port = port; rd->remote_vni = vni; rd->remote_ifindex = ifindex; @@ -458,7 +560,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, /* Add new entry to forwarding table -- assumes lock held */ static int vxlan_fdb_create(struct vxlan_dev *vxlan, - const u8 *mac, __be32 ip, + const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, __be16 port, __u32 vni, __u32 ifindex, __u8 ndm_flags) @@ -517,7 +619,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) return -EOPNOTSUPP; - netdev_dbg(vxlan->dev, "add %pM -> %pI4\n", mac, &ip); + netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); f = kmalloc(sizeof(*f), GFP_ATOMIC); if (!f) return -ENOMEM; @@ -565,17 +667,26 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) } static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, - __be32 *ip, __be16 *port, u32 *vni, u32 *ifindex) + union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex) { struct net *net = dev_net(vxlan->dev); + int err; if (tb[NDA_DST]) { - if (nla_len(tb[NDA_DST]) != sizeof(__be32)) - return -EAFNOSUPPORT; - - *ip = nla_get_be32(tb[NDA_DST]); + err = vxlan_nla_get_addr(ip, tb[NDA_DST]); + if (err) + return err; } else { - *ip = htonl(INADDR_ANY); + union vxlan_addr *remote = &vxlan->default_dst.remote_ip; + if (remote->sa.sa_family == AF_INET) { + ip->sin.sin_addr.s_addr = htonl(INADDR_ANY); + ip->sa.sa_family = AF_INET; +#if IS_ENABLED(CONFIG_IPV6) + } else { + ip->sin6.sin6_addr = in6addr_any; + ip->sa.sa_family = AF_INET6; +#endif + } } if (tb[NDA_PORT]) { @@ -618,7 +729,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct vxlan_dev *vxlan = netdev_priv(dev); /* struct net *net = dev_net(vxlan->dev); */ - __be32 ip; + union vxlan_addr ip; __be16 port; u32 vni, ifindex; int err; @@ -637,7 +748,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; spin_lock_bh(&vxlan->hash_lock); - err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, + err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, port, vni, ifindex, ndm->ndm_flags); spin_unlock_bh(&vxlan->hash_lock); @@ -652,7 +763,7 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; struct vxlan_rdst *rd = NULL; - __be32 ip; + union vxlan_addr ip; __be16 port; u32 vni, ifindex; int err; @@ -668,8 +779,8 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], if (!f) goto out; - if (ip != htonl(INADDR_ANY)) { - rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex); + if (!vxlan_addr_any(&ip)) { + rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex); if (!rd) goto out; } @@ -732,7 +843,7 @@ out: * Return true if packet is bogus and should be droppped. */ static bool vxlan_snoop(struct net_device *dev, - __be32 src_ip, const u8 *src_mac) + union vxlan_addr *src_ip, const u8 *src_mac) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; @@ -741,7 +852,7 @@ static bool vxlan_snoop(struct net_device *dev, if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); - if (likely(rdst->remote_ip == src_ip)) + if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip))) return false; /* Don't migrate static entries, drop packets */ @@ -750,10 +861,10 @@ static bool vxlan_snoop(struct net_device *dev, if (net_ratelimit()) netdev_info(dev, - "%pM migrated from %pI4 to %pI4\n", + "%pM migrated from %pIS to %pIS\n", src_mac, &rdst->remote_ip, &src_ip); - rdst->remote_ip = src_ip; + rdst->remote_ip = *src_ip; f->updated = jiffies; vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH); } else { @@ -775,7 +886,7 @@ static bool vxlan_snoop(struct net_device *dev, } /* See if multicast group is already in use by other ID */ -static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip) +static bool vxlan_group_used(struct vxlan_net *vn, union vxlan_addr *remote_ip) { struct vxlan_dev *vxlan; @@ -783,7 +894,8 @@ static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip) if (!netif_running(vxlan->dev)) continue; - if (vxlan->default_dst.remote_ip == remote_ip) + if (vxlan_addr_equal(&vxlan->default_dst.remote_ip, + remote_ip)) return true; } @@ -819,13 +931,23 @@ static void vxlan_igmp_join(struct work_struct *work) struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join); struct vxlan_sock *vs = vxlan->vn_sock; struct sock *sk = vs->sock->sk; - struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, - .imr_ifindex = vxlan->default_dst.remote_ifindex, - }; + union vxlan_addr *ip = &vxlan->default_dst.remote_ip; + int ifindex = vxlan->default_dst.remote_ifindex; lock_sock(sk); - ip_mc_join_group(sk, &mreq); + if (ip->sa.sa_family == AF_INET) { + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, + .imr_ifindex = ifindex, + }; + + ip_mc_join_group(sk, &mreq); +#if IS_ENABLED(CONFIG_IPV6) + } else { + ipv6_stub->ipv6_sock_mc_join(sk, ifindex, + &ip->sin6.sin6_addr); +#endif + } release_sock(sk); vxlan_sock_release(vs); @@ -838,13 +960,24 @@ static void vxlan_igmp_leave(struct work_struct *work) struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave); struct vxlan_sock *vs = vxlan->vn_sock; struct sock *sk = vs->sock->sk; - struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, - .imr_ifindex = vxlan->default_dst.remote_ifindex, - }; + union vxlan_addr *ip = &vxlan->default_dst.remote_ip; + int ifindex = vxlan->default_dst.remote_ifindex; lock_sock(sk); - ip_mc_leave_group(sk, &mreq); + if (ip->sa.sa_family == AF_INET) { + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, + .imr_ifindex = ifindex, + }; + + ip_mc_leave_group(sk, &mreq); +#if IS_ENABLED(CONFIG_IPV6) + } else { + ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, + &ip->sin6.sin6_addr); +#endif + } + release_sock(sk); vxlan_sock_release(vs); @@ -896,11 +1029,14 @@ error: static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) { - struct iphdr *oip; + struct iphdr *oip = NULL; + struct ipv6hdr *oip6 = NULL; struct vxlan_dev *vxlan; struct pcpu_tstats *stats; + union vxlan_addr saddr; __u32 vni; - int err; + int err = 0; + union vxlan_addr *remote_ip; vni = ntohl(vx_vni) >> 8; /* Is this VNI defined? */ @@ -908,6 +1044,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, if (!vxlan) goto drop; + remote_ip = &vxlan->default_dst.remote_ip; skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, vxlan->dev); @@ -917,9 +1054,20 @@ static void vxlan_rcv(struct vxlan_sock *vs, goto drop; /* Re-examine inner Ethernet packet */ - oip = ip_hdr(skb); + if (remote_ip->sa.sa_family == AF_INET) { + oip = ip_hdr(skb); + saddr.sin.sin_addr.s_addr = oip->saddr; + saddr.sa.sa_family = AF_INET; +#if IS_ENABLED(CONFIG_IPV6) + } else { + oip6 = ipv6_hdr(skb); + saddr.sin6.sin6_addr = oip6->saddr; + saddr.sa.sa_family = AF_INET6; +#endif + } + if ((vxlan->flags & VXLAN_F_LEARN) && - vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source)) + vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) goto drop; skb_reset_network_header(skb); @@ -935,11 +1083,20 @@ static void vxlan_rcv(struct vxlan_sock *vs, skb->encapsulation = 0; - err = IP_ECN_decapsulate(oip, skb); + if (oip6) + err = IP6_ECN_decapsulate(oip6, skb); + if (oip) + err = IP_ECN_decapsulate(oip, skb); + if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &oip->saddr, oip->tos); + if (log_ecn_error) { + if (oip6) + net_info_ratelimited("non-ECT from %pI6\n", + &oip6->saddr); + if (oip) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &oip->saddr, oip->tos); + } if (err > 1) { ++vxlan->dev->stats.rx_frame_errors; ++vxlan->dev->stats.rx_errors; @@ -1009,7 +1166,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) } f = vxlan_find_mac(vxlan, n->ha); - if (f && first_remote_rcu(f)->remote_ip == htonl(INADDR_ANY)) { + if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { /* bridge-local neighbor */ neigh_release(n); goto out; @@ -1027,18 +1184,87 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) if (netif_rx_ni(reply) == NET_RX_DROP) dev->stats.rx_dropped++; - } else if (vxlan->flags & VXLAN_F_L3MISS) - vxlan_ip_miss(dev, tip); + } else if (vxlan->flags & VXLAN_F_L3MISS) { + union vxlan_addr ipa = { + .sin.sin_addr.s_addr = tip, + .sa.sa_family = AF_INET, + }; + + vxlan_ip_miss(dev, &ipa); + } out: consume_skb(skb); return NETDEV_TX_OK; } +#if IS_ENABLED(CONFIG_IPV6) +static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct neighbour *n; + union vxlan_addr ipa; + const struct ipv6hdr *iphdr; + const struct in6_addr *saddr, *daddr; + struct nd_msg *msg; + struct inet6_dev *in6_dev = NULL; + + in6_dev = __in6_dev_get(dev); + if (!in6_dev) + goto out; + + if (!pskb_may_pull(skb, skb->len)) + goto out; + + iphdr = ipv6_hdr(skb); + saddr = &iphdr->saddr; + daddr = &iphdr->daddr; + + if (ipv6_addr_loopback(daddr) || + ipv6_addr_is_multicast(daddr)) + goto out; + + msg = (struct nd_msg *)skb_transport_header(skb); + if (msg->icmph.icmp6_code != 0 || + msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + goto out; + + n = neigh_lookup(ipv6_stub->nd_tbl, daddr, dev); + + if (n) { + struct vxlan_fdb *f; + + if (!(n->nud_state & NUD_CONNECTED)) { + neigh_release(n); + goto out; + } + + f = vxlan_find_mac(vxlan, n->ha); + if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { + /* bridge-local neighbor */ + neigh_release(n); + goto out; + } + + ipv6_stub->ndisc_send_na(dev, n, saddr, &msg->target, + !!in6_dev->cnf.forwarding, + true, false, false); + neigh_release(n); + } else if (vxlan->flags & VXLAN_F_L3MISS) { + ipa.sin6.sin6_addr = *daddr; + ipa.sa.sa_family = AF_INET6; + vxlan_ip_miss(dev, &ipa); + } + +out: + consume_skb(skb); + return NETDEV_TX_OK; +} +#endif + static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) { struct vxlan_dev *vxlan = netdev_priv(dev); struct neighbour *n; - struct iphdr *pip; if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) return false; @@ -1046,11 +1272,47 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) n = NULL; switch (ntohs(eth_hdr(skb)->h_proto)) { case ETH_P_IP: + { + struct iphdr *pip; + if (!pskb_may_pull(skb, sizeof(struct iphdr))) return false; pip = ip_hdr(skb); n = neigh_lookup(&arp_tbl, &pip->daddr, dev); + if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { + union vxlan_addr ipa = { + .sin.sin_addr.s_addr = pip->daddr, + .sa.sa_family = AF_INET, + }; + + vxlan_ip_miss(dev, &ipa); + return false; + } + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case ETH_P_IPV6: + { + struct ipv6hdr *pip6; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return false; + pip6 = ipv6_hdr(skb); + n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev); + if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { + union vxlan_addr ipa = { + .sin6.sin6_addr = pip6->daddr, + .sa.sa_family = AF_INET6, + }; + + vxlan_ip_miss(dev, &ipa); + return false; + } + + break; + } +#endif default: return false; } @@ -1066,8 +1328,8 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) } neigh_release(n); return diff; - } else if (vxlan->flags & VXLAN_F_L3MISS) - vxlan_ip_miss(dev, pip->daddr); + } + return false; } @@ -1118,6 +1380,102 @@ static int handle_offloads(struct sk_buff *skb) return 0; } +#if IS_ENABLED(CONFIG_IPV6) +static int vxlan6_xmit_skb(struct net *net, struct vxlan_sock *vs, + struct dst_entry *dst, struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, __u8 prio, __u8 ttl, + __be16 src_port, __be16 dst_port, __be32 vni) +{ + struct ipv6hdr *ip6h; + struct vxlanhdr *vxh; + struct udphdr *uh; + int min_headroom; + int err; + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + + VXLAN_HLEN + sizeof(struct ipv6hdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + /* Need space for new headers (invalidates iph ptr) */ + err = skb_cow_head(skb, min_headroom); + if (unlikely(err)) + return err; + + if (vlan_tx_tag_present(skb)) { + if (WARN_ON(!__vlan_put_tag(skb, + skb->vlan_proto, + vlan_tx_tag_get(skb)))) + return -ENOMEM; + + skb->vlan_tci = 0; + } + + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); + vxh->vx_flags = htonl(VXLAN_FLAGS); + vxh->vx_vni = vni; + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + + uh->len = htons(skb->len); + uh->check = 0; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) { + __wsum csum = skb_checksum(skb, 0, skb->len, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + uh->check = csum_ipv6_magic(saddr, daddr, skb->len, + IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~csum_ipv6_magic(saddr, daddr, + skb->len, IPPROTO_UDP, 0); + } + + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->version = 6; + ip6h->priority = prio; + ip6h->flow_lbl[0] = 0; + ip6h->flow_lbl[1] = 0; + ip6h->flow_lbl[2] = 0; + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + vxlan_set_owner(vs->sock->sk, skb); + + err = handle_offloads(skb); + if (err) + return err; + + ip6tunnel_xmit(skb, dev); + return 0; +} +#endif + int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, @@ -1182,15 +1540,26 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, { struct pcpu_tstats *tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); struct pcpu_tstats *rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats); + union vxlan_addr loopback; + union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; skb->pkt_type = PACKET_HOST; skb->encapsulation = 0; skb->dev = dst_vxlan->dev; __skb_pull(skb, skb_network_offset(skb)); + if (remote_ip->sa.sa_family == AF_INET) { + loopback.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + loopback.sa.sa_family = AF_INET; +#if IS_ENABLED(CONFIG_IPV6) + } else { + loopback.sin6.sin6_addr = in6addr_loopback; + loopback.sa.sa_family = AF_INET6; +#endif + } + if (dst_vxlan->flags & VXLAN_F_LEARN) - vxlan_snoop(skb->dev, htonl(INADDR_LOOPBACK), - eth_hdr(skb)->h_source); + vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source); u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; @@ -1211,11 +1580,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct rtable *rt; + struct rtable *rt = NULL; const struct iphdr *old_iph; struct flowi4 fl4; - __be32 dst; - __be16 src_port, dst_port; + union vxlan_addr *dst; + __be16 src_port = 0, dst_port; u32 vni; __be16 df = 0; __u8 tos, ttl; @@ -1223,9 +1592,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; vni = rdst->remote_vni; - dst = rdst->remote_ip; + dst = &rdst->remote_ip; - if (!dst) { + if (vxlan_addr_any(dst)) { if (did_rsc) { /* short-circuited back to local bridge */ vxlan_encap_bypass(skb, vxlan, vxlan); @@ -1237,7 +1606,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, old_iph = ip_hdr(skb); ttl = vxlan->ttl; - if (!ttl && IN_MULTICAST(ntohl(dst))) + if (!ttl && vxlan_addr_multicast(dst)) ttl = 1; tos = vxlan->tos; @@ -1246,48 +1615,101 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb); - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = rdst->remote_ifindex; - fl4.flowi4_tos = RT_TOS(tos); - fl4.daddr = dst; - fl4.saddr = vxlan->saddr; - - rt = ip_route_output_key(dev_net(dev), &fl4); - if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", &dst); - dev->stats.tx_carrier_errors++; - goto tx_error; - } + if (dst->sa.sa_family == AF_INET) { + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_oif = rdst->remote_ifindex; + fl4.flowi4_tos = RT_TOS(tos); + fl4.daddr = dst->sin.sin_addr.s_addr; + fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr; + + rt = ip_route_output_key(dev_net(dev), &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", + &dst->sin.sin_addr.s_addr); + dev->stats.tx_carrier_errors++; + goto tx_error; + } - if (rt->dst.dev == dev) { - netdev_dbg(dev, "circular route to %pI4\n", &dst); - dev->stats.collisions++; - goto rt_tx_error; - } + if (rt->dst.dev == dev) { + netdev_dbg(dev, "circular route to %pI4\n", + &dst->sin.sin_addr.s_addr); + dev->stats.collisions++; + goto tx_error; + } - /* Bypass encapsulation if the destination is local */ - if (rt->rt_flags & RTCF_LOCAL && - !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { - struct vxlan_dev *dst_vxlan; + /* Bypass encapsulation if the destination is local */ + if (rt->rt_flags & RTCF_LOCAL && + !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { + struct vxlan_dev *dst_vxlan; - ip_rt_put(rt); - dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); - if (!dst_vxlan) + ip_rt_put(rt); + dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); + if (!dst_vxlan) + goto tx_error; + vxlan_encap_bypass(skb, vxlan, dst_vxlan); + return; + } + + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + + err = vxlan_xmit_skb(dev_net(dev), vxlan->vn_sock, rt, skb, + fl4.saddr, dst->sin.sin_addr.s_addr, + tos, ttl, df, src_port, dst_port, + htonl(vni << 8)); + + if (err < 0) + goto rt_tx_error; + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct sock *sk = vxlan->vn_sock->sock->sk; + struct dst_entry *ndst; + struct flowi6 fl6; + u32 flags; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = rdst->remote_ifindex; + fl6.daddr = dst->sin6.sin6_addr; + fl6.saddr = vxlan->saddr.sin6.sin6_addr; + fl6.flowi6_proto = IPPROTO_UDP; + + if (ipv6_stub->ipv6_dst_lookup(sk, &ndst, &fl6)) { + netdev_dbg(dev, "no route to %pI6\n", + &dst->sin6.sin6_addr); + dev->stats.tx_carrier_errors++; goto tx_error; - vxlan_encap_bypass(skb, vxlan, dst_vxlan); - return; - } + } + + if (ndst->dev == dev) { + netdev_dbg(dev, "circular route to %pI6\n", + &dst->sin6.sin6_addr); + dst_release(ndst); + dev->stats.collisions++; + goto tx_error; + } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); - ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + /* Bypass encapsulation if the destination is local */ + flags = ((struct rt6_info *)ndst)->rt6i_flags; + if (flags & RTF_LOCAL && + !(flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { + struct vxlan_dev *dst_vxlan; + + dst_release(ndst); + dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); + if (!dst_vxlan) + goto tx_error; + vxlan_encap_bypass(skb, vxlan, dst_vxlan); + return; + } - err = vxlan_xmit_skb(dev_net(dev), vxlan->vn_sock, rt, skb, - fl4.saddr, dst, tos, ttl, df, - src_port, dst_port, htonl(vni << 8)); + ttl = ttl ? : ip6_dst_hoplimit(ndst); - if (err < 0) - goto rt_tx_error; - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + err = vxlan6_xmit_skb(dev_net(dev), vxlan->vn_sock, ndst, skb, + dev, &fl6.saddr, &fl6.daddr, 0, ttl, + src_port, dst_port, htonl(vni << 8)); +#endif + } return; @@ -1320,14 +1742,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); eth = eth_hdr(skb); - if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) - return arp_reduce(dev, skb); + if ((vxlan->flags & VXLAN_F_PROXY)) { + if (ntohs(eth->h_proto) == ETH_P_ARP) + return arp_reduce(dev, skb); +#if IS_ENABLED(CONFIG_IPV6) + else if (ntohs(eth->h_proto) == ETH_P_IPV6 && + skb->len >= sizeof(struct ipv6hdr) + sizeof(struct nd_msg) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *msg; + + msg = (struct nd_msg *)skb_transport_header(skb); + if (msg->icmph.icmp6_code == 0 && + msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) + return neigh_reduce(dev, skb); + } +#endif + } f = vxlan_find_mac(vxlan, eth->h_dest); did_rsc = false; if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && - ntohs(eth->h_proto) == ETH_P_IP) { + (ntohs(eth->h_proto) == ETH_P_IP || + ntohs(eth->h_proto) == ETH_P_IPV6)) { did_rsc = route_shortcircuit(dev, skb); if (did_rsc) f = vxlan_find_mac(vxlan, eth->h_dest); @@ -1464,8 +1901,8 @@ static int vxlan_open(struct net_device *dev) if (!vs) return -ENOTCONN; - if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) && - vxlan_group_used(vn, vxlan->default_dst.remote_ip)) { + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && + vxlan_group_used(vn, &vxlan->default_dst.remote_ip)) { vxlan_sock_hold(vs); dev_hold(dev); queue_work(vxlan_wq, &vxlan->igmp_join); @@ -1503,8 +1940,8 @@ static int vxlan_stop(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_sock *vs = vxlan->vn_sock; - if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) && - ! vxlan_group_used(vn, vxlan->default_dst.remote_ip)) { + if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && + ! vxlan_group_used(vn, &vxlan->default_dst.remote_ip)) { vxlan_sock_hold(vs); dev_hold(dev); queue_work(vxlan_wq, &vxlan->igmp_leave); @@ -1552,7 +1989,10 @@ static void vxlan_setup(struct net_device *dev) eth_hw_addr_random(dev); ether_setup(dev); - dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM; + if (vxlan->default_dst.remote_ip.sa.sa_family == AF_INET6) + dev->hard_header_len = ETH_HLEN + VXLAN6_HEADROOM; + else + dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM; dev->netdev_ops = &vxlan_netdev_ops; dev->destructor = free_netdev; @@ -1597,8 +2037,10 @@ static void vxlan_setup(struct net_device *dev) static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_ID] = { .type = NLA_U32 }, [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) }, [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, + [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) }, [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, [IFLA_VXLAN_TTL] = { .type = NLA_U8 }, [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 }, @@ -1669,58 +2111,132 @@ static void vxlan_del_work(struct work_struct *work) kfree_rcu(vs, rcu); } -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, - vxlan_rcv_t *rcv, void *data) +#if IS_ENABLED(CONFIG_IPV6) +/* Create UDP socket for encapsulation receive. AF_INET6 socket + * could be used for both IPv4 and IPv6 communications, but + * users may set bindv6only=1. + */ +static int create_v6_sock(struct net *net, __be16 port, struct socket **psock) +{ + struct sock *sk; + struct socket *sock; + struct sockaddr_in6 vxlan_addr = { + .sin6_family = AF_INET6, + .sin6_port = port, + }; + int rc, val = 1; + + rc = sock_create_kern(AF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (rc < 0) { + pr_debug("UDPv6 socket create failed\n"); + return rc; + } + + /* Put in proper namespace */ + sk = sock->sk; + sk_change_net(sk, net); + + kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, + (char *)&val, sizeof(val)); + rc = kernel_bind(sock, (struct sockaddr *)&vxlan_addr, + sizeof(struct sockaddr_in6)); + if (rc < 0) { + pr_debug("bind for UDPv6 socket %pI6:%u (%d)\n", + &vxlan_addr.sin6_addr, ntohs(vxlan_addr.sin6_port), rc); + sk_release_kernel(sk); + return rc; + } + /* At this point, IPv6 module should have been loaded in + * sock_create_kern(). + */ + BUG_ON(!ipv6_stub); + + *psock = sock; + /* Disable multicast loopback */ + inet_sk(sk)->mc_loop = 0; + return 0; +} + +#else + +static int create_v6_sock(struct net *net, __be16 port, struct socket **psock) +{ + return -EPFNOSUPPORT; +} +#endif + +static int create_v4_sock(struct net *net, __be16 port, struct socket **psock) { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); - struct vxlan_sock *vs; struct sock *sk; + struct socket *sock; struct sockaddr_in vxlan_addr = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = port, }; int rc; - unsigned int h; - - vs = kmalloc(sizeof(*vs), GFP_KERNEL); - if (!vs) { - pr_debug("memory alocation failure\n"); - return ERR_PTR(-ENOMEM); - } - - for (h = 0; h < VNI_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&vs->vni_list[h]); - - INIT_WORK(&vs->del_work, vxlan_del_work); /* Create UDP socket for encapsulation receive. */ - rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vs->sock); + rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (rc < 0) { pr_debug("UDP socket create failed\n"); - kfree(vs); - return ERR_PTR(rc); + return rc; } /* Put in proper namespace */ - sk = vs->sock->sk; + sk = sock->sk; sk_change_net(sk, net); - rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr, + rc = kernel_bind(sock, (struct sockaddr *) &vxlan_addr, sizeof(vxlan_addr)); if (rc < 0) { pr_debug("bind for UDP socket %pI4:%u (%d)\n", &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc); sk_release_kernel(sk); + return rc; + } + + *psock = sock; + /* Disable multicast loopback */ + inet_sk(sk)->mc_loop = 0; + return 0; +} + +/* Create new listen socket if needed */ +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, + vxlan_rcv_t *rcv, void *data, bool ipv6) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct vxlan_sock *vs; + struct socket *sock; + struct sock *sk; + int rc = 0; + unsigned int h; + + vs = kmalloc(sizeof(*vs), GFP_KERNEL); + if (!vs) + return ERR_PTR(-ENOMEM); + + for (h = 0; h < VNI_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&vs->vni_list[h]); + + INIT_WORK(&vs->del_work, vxlan_del_work); + + if (ipv6) + rc = create_v6_sock(net, port, &sock); + else + rc = create_v4_sock(net, port, &sock); + if (rc < 0) { kfree(vs); return ERR_PTR(rc); } + + vs->sock = sock; + sk = sock->sk; atomic_set(&vs->refcnt, 1); vs->rcv = rcv; vs->data = data; - /* Disable multicast loopback */ - inet_sk(sk)->mc_loop = 0; spin_lock(&vn->sock_lock); hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); spin_unlock(&vn->sock_lock); @@ -1728,18 +2244,24 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, /* Mark socket as an encapsulation socket. */ udp_sk(sk)->encap_type = 1; udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; - udp_encap_enable(); +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6) + ipv6_stub->udpv6_encap_enable(); + else +#endif + udp_encap_enable(); + return vs; } struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, - bool no_share) + bool no_share, bool ipv6) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; - vs = vxlan_socket_create(net, port, rcv, data); + vs = vxlan_socket_create(net, port, rcv, data, ipv6); if (!IS_ERR(vs)) return vs; @@ -1772,7 +2294,7 @@ static void vxlan_sock_work(struct work_struct *work) __be16 port = vxlan->dst_port; struct vxlan_sock *nvs; - nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false); + nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags & VXLAN_F_IPV6); spin_lock(&vn->sock_lock); if (!IS_ERR(nvs)) vxlan_vs_add_dev(nvs, vxlan); @@ -1789,6 +2311,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, struct vxlan_rdst *dst = &vxlan->default_dst; __u32 vni; int err; + bool use_ipv6 = false; if (!data[IFLA_VXLAN_ID]) return -EINVAL; @@ -1796,11 +2319,32 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, vni = nla_get_u32(data[IFLA_VXLAN_ID]); dst->remote_vni = vni; - if (data[IFLA_VXLAN_GROUP]) - dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_GROUP]); + if (data[IFLA_VXLAN_GROUP]) { + dst->remote_ip.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_GROUP]); + dst->remote_ip.sa.sa_family = AF_INET; + } else if (data[IFLA_VXLAN_GROUP6]) { + if (!IS_ENABLED(CONFIG_IPV6)) + return -EPFNOSUPPORT; + + nla_memcpy(&dst->remote_ip.sin6.sin6_addr, data[IFLA_VXLAN_GROUP6], + sizeof(struct in6_addr)); + dst->remote_ip.sa.sa_family = AF_INET6; + use_ipv6 = true; + } - if (data[IFLA_VXLAN_LOCAL]) - vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]); + if (data[IFLA_VXLAN_LOCAL]) { + vxlan->saddr.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_LOCAL]); + vxlan->saddr.sa.sa_family = AF_INET; + } else if (data[IFLA_VXLAN_LOCAL6]) { + if (!IS_ENABLED(CONFIG_IPV6)) + return -EPFNOSUPPORT; + + /* TODO: respect scope id */ + nla_memcpy(&vxlan->saddr.sin6.sin6_addr, data[IFLA_VXLAN_LOCAL6], + sizeof(struct in6_addr)); + vxlan->saddr.sa.sa_family = AF_INET6; + use_ipv6 = true; + } if (data[IFLA_VXLAN_LINK] && (dst->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]))) { @@ -1812,12 +2356,23 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, return -ENODEV; } +#if IS_ENABLED(CONFIG_IPV6) + if (use_ipv6) { + struct inet6_dev *idev = __in6_dev_get(lowerdev); + if (idev && idev->cnf.disable_ipv6) { + pr_info("IPv6 is disabled via sysctl\n"); + return -EPERM; + } + vxlan->flags |= VXLAN_F_IPV6; + } +#endif + if (!tb[IFLA_MTU]) - dev->mtu = lowerdev->mtu - VXLAN_HEADROOM; + dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); /* update header length based on lower device */ dev->hard_header_len = lowerdev->hard_header_len + - VXLAN_HEADROOM; + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); } if (data[IFLA_VXLAN_TOS]) @@ -1868,7 +2423,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, /* create an fdb entry for default destination */ err = vxlan_fdb_create(vxlan, all_zeros_mac, - vxlan->default_dst.remote_ip, + &vxlan->default_dst.remote_ip, NUD_REACHABLE|NUD_PERMANENT, NLM_F_EXCL|NLM_F_CREATE, vxlan->dst_port, vxlan->default_dst.remote_vni, @@ -1905,9 +2460,9 @@ static size_t vxlan_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ - nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */ + nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ - nla_total_size(sizeof(__be32))+ /* IFLA_VXLAN_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ @@ -1934,14 +2489,36 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) goto nla_put_failure; - if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_GROUP, dst->remote_ip)) - goto nla_put_failure; + if (!vxlan_addr_any(&dst->remote_ip)) { + if (dst->remote_ip.sa.sa_family == AF_INET) { + if (nla_put_be32(skb, IFLA_VXLAN_GROUP, + dst->remote_ip.sin.sin_addr.s_addr)) + goto nla_put_failure; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (nla_put(skb, IFLA_VXLAN_GROUP6, sizeof(struct in6_addr), + &dst->remote_ip.sin6.sin6_addr)) + goto nla_put_failure; +#endif + } + } if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex)) goto nla_put_failure; - if (vxlan->saddr && nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr)) - goto nla_put_failure; + if (!vxlan_addr_any(&vxlan->saddr)) { + if (vxlan->saddr.sa.sa_family == AF_INET) { + if (nla_put_be32(skb, IFLA_VXLAN_LOCAL, + vxlan->saddr.sin.sin_addr.s_addr)) + goto nla_put_failure; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (nla_put(skb, IFLA_VXLAN_LOCAL6, sizeof(struct in6_addr), + &vxlan->saddr.sin6.sin6_addr)) + goto nla_put_failure; +#endif + } + } if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) || diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index d0adbaf86186..7fe19648f10e 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -2693,7 +2693,7 @@ static struct net_device *init_wifidev(struct airo_info *ai, dev->base_addr = ethdev->base_addr; dev->wireless_data = ethdev->wireless_data; SET_NETDEV_DEV(dev, ethdev->dev.parent); - memcpy(dev->dev_addr, ethdev->dev_addr, dev->addr_len); + eth_hw_addr_inherit(dev, ethdev); err = register_netdev(dev); if (err<0) { free_netdev(dev); diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c index f7c70b3a6ea9..c51d2dc489e4 100644 --- a/drivers/net/wireless/b43/dma.c +++ b/drivers/net/wireless/b43/dma.c @@ -431,9 +431,9 @@ static int alloc_ringmemory(struct b43_dmaring *ring) u16 ring_mem_size = (ring->type == B43_DMA_64BIT) ? B43_DMA64_RINGMEMSIZE : B43_DMA32_RINGMEMSIZE; - ring->descbase = dma_alloc_coherent(ring->dev->dev->dma_dev, - ring_mem_size, &(ring->dmabase), - GFP_KERNEL | __GFP_ZERO); + ring->descbase = dma_zalloc_coherent(ring->dev->dev->dma_dev, + ring_mem_size, &(ring->dmabase), + GFP_KERNEL); if (!ring->descbase) return -ENOMEM; diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c index faeafe219c57..42eb26c99e11 100644 --- a/drivers/net/wireless/b43legacy/dma.c +++ b/drivers/net/wireless/b43legacy/dma.c @@ -331,10 +331,9 @@ void free_descriptor_buffer(struct b43legacy_dmaring *ring, static int alloc_ringmemory(struct b43legacy_dmaring *ring) { /* GFP flags must match the flags in free_ringmemory()! */ - ring->descbase = dma_alloc_coherent(ring->dev->dev->dma_dev, - B43legacy_DMA_RINGMEMSIZE, - &(ring->dmabase), - GFP_KERNEL | __GFP_ZERO); + ring->descbase = dma_zalloc_coherent(ring->dev->dev->dma_dev, + B43legacy_DMA_RINGMEMSIZE, + &(ring->dmabase), GFP_KERNEL); if (!ring->descbase) return -ENOMEM; diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index 6307a4e36c85..c275dc1623fe 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -1425,7 +1425,7 @@ static int prism2_hw_init2(struct net_device *dev, int initial) } list_for_each(ptr, &local->hostap_interfaces) { iface = list_entry(ptr, struct hostap_interface, list); - memcpy(iface->dev->dev_addr, dev->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(iface->dev, dev); } } else if (local->fw_ap) prism2_check_sta_fw_version(local); diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index e4f56ad26cd8..a1257c92afc4 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -66,7 +66,7 @@ struct net_device * hostap_add_interface(struct local_info *local, list_add(&iface->list, &local->hostap_interfaces); mdev = local->dev; - memcpy(dev->dev_addr, mdev->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(dev, mdev); dev->base_addr = mdev->base_addr; dev->irq = mdev->irq; dev->mem_start = mdev->mem_start; diff --git a/drivers/net/wireless/libertas/mesh.c b/drivers/net/wireless/libertas/mesh.c index efae07e05c80..6fef746345bc 100644 --- a/drivers/net/wireless/libertas/mesh.c +++ b/drivers/net/wireless/libertas/mesh.c @@ -1017,7 +1017,7 @@ static int lbs_add_mesh(struct lbs_private *priv) mesh_dev->netdev_ops = &mesh_netdev_ops; mesh_dev->ethtool_ops = &lbs_ethtool_ops; - memcpy(mesh_dev->dev_addr, priv->dev->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(mesh_dev, priv->dev); SET_NETDEV_DEV(priv->mesh_dev, priv->dev->dev.parent); diff --git a/drivers/staging/vt6655/hostap.c b/drivers/staging/vt6655/hostap.c index 57a08c5771f2..8acff44a9e75 100644 --- a/drivers/staging/vt6655/hostap.c +++ b/drivers/staging/vt6655/hostap.c @@ -86,7 +86,7 @@ static int hostap_enable_hostapd(PSDevice pDevice, int rtnl_locked) apdev_priv = netdev_priv(pDevice->apdev); *apdev_priv = *pDevice; - memcpy(pDevice->apdev->dev_addr, dev->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(pDevice->apdev, dev); pDevice->apdev->netdev_ops = &apdev_netdev_ops; diff --git a/drivers/staging/vt6655/ioctl.c b/drivers/staging/vt6655/ioctl.c index 46e0e41e7e60..b5cd2e44e53d 100644 --- a/drivers/staging/vt6655/ioctl.c +++ b/drivers/staging/vt6655/ioctl.c @@ -460,7 +460,7 @@ int private_ioctl(PSDevice pDevice, struct ifreq *rq) } if (sValue.dwValue == 1) { DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "up wpadev\n"); - memcpy(pDevice->wpadev->dev_addr, pDevice->dev->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(pDevice->wpadev, pDevice->dev); pDevice->bWPADEVUp = true; } else { DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "close wpadev\n"); diff --git a/drivers/staging/vt6655/wpactl.c b/drivers/staging/vt6655/wpactl.c index 869f62c678e6..e8d9ecd2913a 100644 --- a/drivers/staging/vt6655/wpactl.c +++ b/drivers/staging/vt6655/wpactl.c @@ -96,7 +96,7 @@ static int wpa_init_wpadev(PSDevice pDevice) wpadev_priv = netdev_priv(pDevice->wpadev); *wpadev_priv = *pDevice; - memcpy(pDevice->wpadev->dev_addr, dev->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(pDevice->wpadev, dev); pDevice->wpadev->base_addr = dev->base_addr; pDevice->wpadev->irq = dev->irq; pDevice->wpadev->mem_start = dev->mem_start; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 94af41858513..3a8d0a2af607 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -132,9 +132,8 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) static inline void *dma_zalloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - void *ret = dma_alloc_coherent(dev, size, dma_handle, flag); - if (ret) - memset(ret, 0, size); + void *ret = dma_alloc_coherent(dev, size, dma_handle, + flag | __GFP_ZERO); return ret; } diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index c623861964e4..d8b512496e50 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -199,6 +199,21 @@ static inline void eth_hw_addr_random(struct net_device *dev) } /** + * eth_hw_addr_inherit - Copy dev_addr from another net_device + * @dst: pointer to net_device to copy dev_addr to + * @src: pointer to net_device to copy dev_addr from + * + * Copy the Ethernet address from one net_device to another along with + * the address attributes (addr_assign_type). + */ +static inline void eth_hw_addr_inherit(struct net_device *dst, + struct net_device *src) +{ + dst->addr_assign_type = src->addr_assign_type; + memcpy(dst->dev_addr, src->dev_addr, ETH_ALEN); +} + +/** * compare_ether_addr - Compare two Ethernet addresses * @addr1: Pointer to a six-byte array containing the Ethernet address * @addr2: Pointer other six-byte array containing the Ethernet address diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 9ac5047062c8..28ea38439313 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -50,6 +50,7 @@ struct ipv6_devconf { __s32 accept_dad; __s32 force_tllao; __s32 ndisc_notify; + __s32 suppress_frag_ndisc; void *sysctl; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 077363dcd860..3ad49b833eab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1125,6 +1125,7 @@ struct net_device { struct list_head napi_list; struct list_head unreg_list; struct list_head upper_dev_list; /* List of upper devices */ + struct list_head lower_dev_list; /* currently active device features */ @@ -2767,6 +2768,16 @@ extern int bpf_jit_enable; extern bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); extern bool netdev_has_any_upper_dev(struct net_device *dev); +extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter); + +/* iterate through upper list, must be called under RCU read lock */ +#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \ + for (iter = &(dev)->upper_dev_list, \ + upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \ + upper; \ + upper = netdev_upper_get_next_dev_rcu(dev, &(iter))) + extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); extern int netdev_upper_dev_link(struct net_device *dev, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 9e495d31516e..bb5deb0feb6b 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -108,6 +108,7 @@ struct plat_stmmacenet_data { int bugged_jumbo; int pmt; int force_sf_dma_mode; + int force_thresh_dma_mode; int riwt_off; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 43fa31a610b8..fb314de2b61b 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -141,6 +141,26 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, const struct in6_addr *src_addr); void ipv6_mc_dad_complete(struct inet6_dev *idev); + +/* A stub used by vxlan module. This is ugly, ideally these + * symbols should be built into the core kernel. + */ +struct ipv6_stub { + int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, + const struct in6_addr *addr); + int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, + struct flowi6 *fl6); + void (*udpv6_encap_enable)(void); + void (*ndisc_send_na)(struct net_device *dev, struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *solicited_addr, + bool router, bool solicited, bool override, bool inc_opt); + struct neigh_table *nd_tbl; +}; +extern const struct ipv6_stub *ipv6_stub __read_mostly; + /* * identify MLD packets for MLD filter exceptions */ diff --git a/include/net/ip.h b/include/net/ip.h index a68f838a132c..48f55979d842 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -194,7 +194,17 @@ static inline u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp } #endif extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align); -extern void snmp_mib_free(void __percpu *ptr[2]); + +static inline void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) +{ + int i; + + BUG_ON(ptr == NULL); + for (i = 0; i < SNMP_ARRAY_SZ; i++) { + free_percpu(ptr[i]); + ptr[i] = NULL; + } +} extern struct local_ports { seqlock_t lock; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f667248202b6..f525e7038cca 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -112,8 +112,6 @@ extern struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast); -extern int ip6_dst_hoplimit(struct dst_entry *dst); - /* * support functions for ND * diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7bdff0455065..bbf1c8fb8511 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -658,6 +658,8 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add extern void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); +extern int ip6_dst_hoplimit(struct dst_entry *dst); + /* * Header manipulation */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 6fea32340ae8..3c4211f0bed6 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -204,6 +204,11 @@ extern void ndisc_send_ns(struct net_device *dev, extern void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); +extern void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *solicited_addr, + bool router, bool solicited, bool override, + bool inc_opt); extern void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target); diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f7c24f8fbdc5..59ec3cd15d68 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -85,6 +85,9 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, int register_qdisc(struct Qdisc_ops *qops); int unregister_qdisc(struct Qdisc_ops *qops); +void qdisc_get_default(char *id, size_t len); +int qdisc_set_default(const char *id); + void qdisc_list_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 76368c9d4503..f4eb365f7dcd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -316,6 +316,7 @@ extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; extern struct Qdisc_ops mq_qdisc_ops; +extern const struct Qdisc_ops *default_qdisc_ops; struct Qdisc_class_common { u32 classid; @@ -369,9 +370,9 @@ void qdisc_reset(struct Qdisc *qdisc); void qdisc_destroy(struct Qdisc *qdisc); void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - struct Qdisc_ops *ops); + const struct Qdisc_ops *ops); struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, - struct Qdisc_ops *ops, u32 parentid); + const struct Qdisc_ops *ops, u32 parentid); void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); void tcf_destroy(struct tcf_proto *tp); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 422db6cc3214..2174d8da0770 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -113,29 +113,27 @@ struct sctp_hashbucket { /* The SCTP globals structure. */ extern struct sctp_globals { - /* The following variables are implementation specific. */ - - /* Default initialization values to be applied to new associations. */ - __u16 max_instreams; - __u16 max_outstreams; - /* This is a list of groups of functions for each address * family that we support. */ struct list_head address_families; /* This is the hash of all endpoints. */ - int ep_hashsize; struct sctp_hashbucket *ep_hashtable; - /* This is the hash of all associations. */ - int assoc_hashsize; struct sctp_hashbucket *assoc_hashtable; - /* This is the sctp port control hash. */ - int port_hashsize; struct sctp_bind_hashbucket *port_hashtable; + /* Sizes of above hashtables. */ + int ep_hashsize; + int assoc_hashsize; + int port_hashsize; + + /* Default initialization values to be applied to new associations. */ + __u16 max_instreams; + __u16 max_outstreams; + /* Flag to indicate whether computing and verifying checksum * is disabled. */ bool checksum_disable; diff --git a/include/net/sock.h b/include/net/sock.h index e4bbcbfd07ea..6ba2e7b0e2b1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -232,6 +232,7 @@ struct cg_proto; * @sk_napi_id: id of the last napi context to receive data for sk * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode + * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) * @sk_sndbuf: size of send buffer in bytes * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings @@ -361,6 +362,7 @@ struct sock { kmemcheck_bitfield_end(flags); int sk_wmem_queued; gfp_t sk_allocation; + u32 sk_pacing_rate; /* bytes per second */ netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; int sk_gso_type; diff --git a/include/net/tcp.h b/include/net/tcp.h index dd5e16f66f84..6a6a88db462d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -281,6 +281,7 @@ extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; extern unsigned int sysctl_tcp_notsent_lowat; +extern int sysctl_tcp_min_tso_segs; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index ad342e3688a0..d2b88cafa7a2 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -25,7 +25,7 @@ struct vxlan_sock { struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, - bool no_share); + bool no_share, bool ipv6); void vxlan_sock_release(struct vxlan_sock *vs); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 04c0e7a5d484..80394e8dc3a3 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -314,6 +314,8 @@ enum { IFLA_VXLAN_L2MISS, IFLA_VXLAN_L3MISS, IFLA_VXLAN_PORT, /* destination port */ + IFLA_VXLAN_GROUP6, + IFLA_VXLAN_LOCAL6, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index b950c02030c0..dbf06667394b 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -56,6 +56,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_LB 1 #define PACKET_FANOUT_CPU 2 #define PACKET_FANOUT_ROLLOVER 3 +#define PACKET_FANOUT_RND 4 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index d07ac6903e59..593b0e32d956 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -162,6 +162,7 @@ enum { DEVCONF_NDISC_NOTIFY, DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL, DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL, + DEVCONF_SUPPRESS_FRAG_NDISC, DEVCONF_MAX }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 09d62b9228ff..9b829134d422 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -744,4 +744,45 @@ struct tc_fq_codel_xstats { }; }; +/* FQ */ + +enum { + TCA_FQ_UNSPEC, + + TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ + + TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ + + TCA_FQ_QUANTUM, /* RR quantum */ + + TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + + TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate, + * use the following rate + */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + __TCA_FQ_MAX +}; + +#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) + +struct tc_fq_qd_stats { + __u64 gc_flows; + __u64 highprio_packets; + __u64 tcp_retrans; + __u64 throttled; + __u64 flows_plimit; + __u64 pkts_too_long; + __u64 allocation_errors; + __s64 time_next_delayed_flow; + __u32 flows; + __u32 inactive_flows; + __u32 throttled_flows; + __u32 pad; +}; #endif diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9ab8a7ed99c0..09bf1c38805b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -582,7 +582,7 @@ static int vlan_dev_init(struct net_device *dev) dev->dev_id = real_dev->dev_id; if (is_zero_ether_addr(dev->dev_addr)) - memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); + eth_hw_addr_inherit(dev, real_dev); if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index aa6c9a8ba32a..c41d5fbb91d0 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -383,6 +383,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) netdev_update_features(br->dev); + if (br->dev->needed_headroom < dev->needed_headroom) + br->dev->needed_headroom = dev->needed_headroom; + spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); diff --git a/net/core/dev.c b/net/core/dev.c index 1ed2b66a10a6..6fbb0c90849b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4367,57 +4367,48 @@ softnet_break: goto out; } -struct netdev_upper { +struct netdev_adjacent { struct net_device *dev; + + /* upper master flag, there can only be one master device per list */ bool master; + + /* indicates that this dev is our first-level lower/upper device */ + bool neighbour; + + /* counter for the number of times this device was added to us */ + u16 ref_nr; + struct list_head list; struct rcu_head rcu; - struct list_head search_list; }; -static void __append_search_uppers(struct list_head *search_list, - struct net_device *dev) +static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, + struct net_device *adj_dev, + bool upper) { - struct netdev_upper *upper; + struct netdev_adjacent *adj; + struct list_head *dev_list; - list_for_each_entry(upper, &dev->upper_dev_list, list) { - /* check if this upper is not already in search list */ - if (list_empty(&upper->search_list)) - list_add_tail(&upper->search_list, search_list); + dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list; + + list_for_each_entry(adj, dev_list, list) { + if (adj->dev == adj_dev) + return adj; } + return NULL; } -static bool __netdev_search_upper_dev(struct net_device *dev, - struct net_device *upper_dev) +static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, + struct net_device *udev) { - LIST_HEAD(search_list); - struct netdev_upper *upper; - struct netdev_upper *tmp; - bool ret = false; - - __append_search_uppers(&search_list, dev); - list_for_each_entry(upper, &search_list, search_list) { - if (upper->dev == upper_dev) { - ret = true; - break; - } - __append_search_uppers(&search_list, upper->dev); - } - list_for_each_entry_safe(upper, tmp, &search_list, search_list) - INIT_LIST_HEAD(&upper->search_list); - return ret; + return __netdev_find_adj(dev, udev, true); } -static struct netdev_upper *__netdev_find_upper(struct net_device *dev, - struct net_device *upper_dev) +static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev, + struct net_device *ldev) { - struct netdev_upper *upper; - - list_for_each_entry(upper, &dev->upper_dev_list, list) { - if (upper->dev == upper_dev) - return upper; - } - return NULL; + return __netdev_find_adj(dev, ldev, false); } /** @@ -4462,7 +4453,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev); */ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) { - struct netdev_upper *upper; + struct netdev_adjacent *upper; ASSERT_RTNL(); @@ -4470,13 +4461,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) return NULL; upper = list_first_entry(&dev->upper_dev_list, - struct netdev_upper, list); + struct netdev_adjacent, list); if (likely(upper->master)) return upper->dev; return NULL; } EXPORT_SYMBOL(netdev_master_upper_dev_get); +/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next device from the dev's upper list, starting from iter + * position. The caller must hold RCU read lock. + */ +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->upper_dev_list) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} +EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); + /** * netdev_master_upper_dev_get_rcu - Get master upper device * @dev: device @@ -4486,20 +4502,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get); */ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) { - struct netdev_upper *upper; + struct netdev_adjacent *upper; upper = list_first_or_null_rcu(&dev->upper_dev_list, - struct netdev_upper, list); + struct netdev_adjacent, list); if (upper && likely(upper->master)) return upper->dev; return NULL; } EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); +static int __netdev_adjacent_dev_insert(struct net_device *dev, + struct net_device *adj_dev, + bool neighbour, bool master, + bool upper) +{ + struct netdev_adjacent *adj; + + adj = __netdev_find_adj(dev, adj_dev, upper); + + if (adj) { + BUG_ON(neighbour); + adj->ref_nr++; + return 0; + } + + adj = kmalloc(sizeof(*adj), GFP_KERNEL); + if (!adj) + return -ENOMEM; + + adj->dev = adj_dev; + adj->master = master; + adj->neighbour = neighbour; + adj->ref_nr = 1; + + dev_hold(adj_dev); + pr_debug("dev_hold for %s, because of %s link added from %s to %s\n", + adj_dev->name, upper ? "upper" : "lower", dev->name, + adj_dev->name); + + if (!upper) { + list_add_tail_rcu(&adj->list, &dev->lower_dev_list); + return 0; + } + + /* Ensure that master upper link is always the first item in list. */ + if (master) + list_add_rcu(&adj->list, &dev->upper_dev_list); + else + list_add_tail_rcu(&adj->list, &dev->upper_dev_list); + + return 0; +} + +static inline int __netdev_upper_dev_insert(struct net_device *dev, + struct net_device *udev, + bool master, bool neighbour) +{ + return __netdev_adjacent_dev_insert(dev, udev, neighbour, master, + true); +} + +static inline int __netdev_lower_dev_insert(struct net_device *dev, + struct net_device *ldev, + bool neighbour) +{ + return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false, + false); +} + +void __netdev_adjacent_dev_remove(struct net_device *dev, + struct net_device *adj_dev, bool upper) +{ + struct netdev_adjacent *adj; + + if (upper) + adj = __netdev_find_upper(dev, adj_dev); + else + adj = __netdev_find_lower(dev, adj_dev); + + if (!adj) + BUG(); + + if (adj->ref_nr > 1) { + adj->ref_nr--; + return; + } + + list_del_rcu(&adj->list); + pr_debug("dev_put for %s, because of %s link removed from %s to %s\n", + adj_dev->name, upper ? "upper" : "lower", dev->name, + adj_dev->name); + dev_put(adj_dev); + kfree_rcu(adj, rcu); +} + +static inline void __netdev_upper_dev_remove(struct net_device *dev, + struct net_device *udev) +{ + return __netdev_adjacent_dev_remove(dev, udev, true); +} + +static inline void __netdev_lower_dev_remove(struct net_device *dev, + struct net_device *ldev) +{ + return __netdev_adjacent_dev_remove(dev, ldev, false); +} + +int __netdev_adjacent_dev_insert_link(struct net_device *dev, + struct net_device *upper_dev, + bool master, bool neighbour) +{ + int ret; + + ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour); + if (ret) + return ret; + + ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour); + if (ret) { + __netdev_upper_dev_remove(dev, upper_dev); + return ret; + } + + return 0; +} + +static inline int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *udev) +{ + return __netdev_adjacent_dev_insert_link(dev, udev, false, false); +} + +static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *udev, + bool master) +{ + return __netdev_adjacent_dev_insert_link(dev, udev, master, true); +} + +void __netdev_adjacent_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) +{ + __netdev_upper_dev_remove(dev, upper_dev); + __netdev_lower_dev_remove(upper_dev, dev); +} + + static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master) { - struct netdev_upper *upper; + struct netdev_adjacent *i, *j, *to_i, *to_j; + int ret = 0; ASSERT_RTNL(); @@ -4507,7 +4661,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_search_upper_dev(upper_dev, dev)) + if (__netdev_find_upper(upper_dev, dev)) return -EBUSY; if (__netdev_find_upper(dev, upper_dev)) @@ -4516,22 +4670,76 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - upper = kmalloc(sizeof(*upper), GFP_KERNEL); - if (!upper) - return -ENOMEM; + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master); + if (ret) + return ret; - upper->dev = upper_dev; - upper->master = master; - INIT_LIST_HEAD(&upper->search_list); + /* Now that we linked these devs, make all the upper_dev's + * upper_dev_list visible to every dev's lower_dev_list and vice + * versa, and don't forget the devices itself. All of these + * links are non-neighbours. + */ + list_for_each_entry(i, &upper_dev->upper_dev_list, list) { + list_for_each_entry(j, &dev->lower_dev_list, list) { + ret = __netdev_adjacent_dev_link(i->dev, j->dev); + if (ret) + goto rollback_mesh; + } + } + + /* add dev to every upper_dev's upper device */ + list_for_each_entry(i, &upper_dev->upper_dev_list, list) { + ret = __netdev_adjacent_dev_link(dev, i->dev); + if (ret) + goto rollback_upper_mesh; + } + + /* add upper_dev to every dev's lower device */ + list_for_each_entry(i, &dev->lower_dev_list, list) { + ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + if (ret) + goto rollback_lower_mesh; + } - /* Ensure that master upper link is always the first item in list. */ - if (master) - list_add_rcu(&upper->list, &dev->upper_dev_list); - else - list_add_tail_rcu(&upper->list, &dev->upper_dev_list); - dev_hold(upper_dev); call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); return 0; + +rollback_lower_mesh: + to_i = i; + list_for_each_entry(i, &dev->lower_dev_list, list) { + if (i == to_i) + break; + __netdev_adjacent_dev_unlink(i->dev, upper_dev); + } + + i = NULL; + +rollback_upper_mesh: + to_i = i; + list_for_each_entry(i, &upper_dev->upper_dev_list, list) { + if (i == to_i) + break; + __netdev_adjacent_dev_unlink(dev, i->dev); + } + + i = j = NULL; + +rollback_mesh: + to_i = i; + to_j = j; + list_for_each_entry(i, &dev->lower_dev_list, list) { + list_for_each_entry(j, &upper_dev->upper_dev_list, list) { + if (i == to_i && j == to_j) + break; + __netdev_adjacent_dev_unlink(i->dev, j->dev); + } + if (i == to_i) + break; + } + + __netdev_adjacent_dev_unlink(dev, upper_dev); + + return ret; } /** @@ -4580,16 +4788,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - struct netdev_upper *upper; - + struct netdev_adjacent *i, *j; ASSERT_RTNL(); - upper = __netdev_find_upper(dev, upper_dev); - if (!upper) - return; - list_del_rcu(&upper->list); - dev_put(upper_dev); - kfree_rcu(upper, rcu); + __netdev_adjacent_dev_unlink(dev, upper_dev); + + /* Here is the tricky part. We must remove all dev's lower + * devices from all upper_dev's upper devices and vice + * versa, to maintain the graph relationship. + */ + list_for_each_entry(i, &dev->lower_dev_list, list) + list_for_each_entry(j, &upper_dev->upper_dev_list, list) + __netdev_adjacent_dev_unlink(i->dev, j->dev); + + /* remove also the devices itself from lower/upper device + * list + */ + list_for_each_entry(i, &dev->lower_dev_list, list) + __netdev_adjacent_dev_unlink(i->dev, upper_dev); + + list_for_each_entry(i, &upper_dev->upper_dev_list, list) + __netdev_adjacent_dev_unlink(dev, i->dev); + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -5850,6 +6070,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->upper_dev_list); + INIT_LIST_HEAD(&dev->lower_dev_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 60533db8b72d..6072610a8672 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2759,13 +2759,11 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -#ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } EXPORT_SYMBOL(neigh_app_ns); -#endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL static int zero; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 31107abd2783..cca444190907 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -20,6 +20,7 @@ #include <net/sock.h> #include <net/net_ratelimit.h> #include <net/busy_poll.h> +#include <net/pkt_sched.h> static int zero = 0; static int one = 1; @@ -193,6 +194,26 @@ static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, } #endif /* CONFIG_NET_FLOW_LIMIT */ +#ifdef CONFIG_NET_SCHED +static int set_default_qdisc(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char id[IFNAMSIZ]; + struct ctl_table tbl = { + .data = id, + .maxlen = IFNAMSIZ, + }; + int ret; + + qdisc_get_default(id, IFNAMSIZ); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) + ret = qdisc_set_default(id); + return ret; +} +#endif + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -315,7 +336,14 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, -# +#endif +#ifdef CONFIG_NET_SCHED + { + .procname = "default_qdisc", + .mode = 0644, + .maxlen = IFNAMSIZ, + .proc_handler = set_default_qdisc + }, #endif #endif /* CONFIG_NET */ { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6ebd8fbd9285..29d684ebca6a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -347,7 +347,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->features = master->vlan_features; SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); - memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); + eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; switch (ds->dst->tag_protocol) { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 37cf1a6ea3ad..05c57f0fcabe 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -259,22 +259,6 @@ config IP_PIMSM_V2 gated-5). This routing protocol is not used widely, so say N unless you want to play with it. -config ARPD - bool "IP: ARP daemon support" - ---help--- - The kernel maintains an internal cache which maps IP addresses to - hardware addresses on the local network, so that Ethernet - frames are sent to the proper address on the physical networking - layer. Normally, kernel uses the ARP protocol to resolve these - mappings. - - Saying Y here adds support to have an user space daemon to do this - resolution instead. This is useful for implementing an alternate - address resolution protocol (e.g. NHRP on mGRE tunnels) and also for - testing purposes. - - If unsure, say N. - config SYN_COOKIES bool "IP: TCP syncookie support" ---help--- diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b4d0be2b7ce9..7a1874b7b8fd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1532,18 +1532,6 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) } EXPORT_SYMBOL_GPL(snmp_mib_init); -void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) -{ - int i; - - BUG_ON(ptr == NULL); - for (i = 0; i < SNMP_ARRAY_SZ; i++) { - free_percpu(ptr[i]); - ptr[i] = NULL; - } -} -EXPORT_SYMBOL_GPL(snmp_mib_free); - #ifdef CONFIG_IP_MULTICAST static const struct net_protocol igmp_protocol = { .handler = igmp_rcv, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4429b013f269..7808093cede6 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -368,9 +368,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } else { probes -= neigh->parms->app_probes; if (probes < 0) { -#ifdef CONFIG_ARPD neigh_app_ns(neigh); -#endif return; } } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 8ed7c32ae28e..540279f4c531 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -29,6 +29,7 @@ static int zero; static int one = 1; static int four = 4; +static int gso_max_segs = GSO_MAX_SEGS; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -761,6 +762,15 @@ static struct ctl_table ipv4_table[] = { .extra2 = &four, }, { + .procname = "tcp_min_tso_segs", + .data = &sysctl_tcp_min_tso_segs, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &gso_max_segs, + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4e42c03859f4..fdf74090a001 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -283,6 +283,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; +int sysctl_tcp_min_tso_segs __read_mostly = 2; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -785,12 +787,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, xmit_size_goal = mss_now; if (large_allowed && sk_can_gso(sk)) { - xmit_size_goal = ((sk->sk_gso_max_size - 1) - - inet_csk(sk)->icsk_af_ops->net_header_len - - inet_csk(sk)->icsk_ext_hdr_len - - tp->tcp_header_len); + u32 gso_size, hlen; + + /* Maybe we should/could use sk->sk_prot->max_header here ? */ + hlen = inet_csk(sk)->icsk_af_ops->net_header_len + + inet_csk(sk)->icsk_ext_hdr_len + + tp->tcp_header_len; + + /* Goal is to send at least one packet per ms, + * not one big TSO packet every 100 ms. + * This preserves ACK clocking and is consistent + * with tcp_tso_should_defer() heuristic. + */ + gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); + gso_size = max_t(u32, gso_size, + sysctl_tcp_min_tso_segs * mss_now); + + xmit_size_goal = min_t(u32, gso_size, + sk->sk_gso_max_size - 1 - hlen); - /* TSQ : try to have two TSO segments in flight */ + /* TSQ : try to have at least two segments in flight + * (one in NIC TX ring, another in Qdisc) + */ xmit_size_goal = min_t(u32, xmit_size_goal, sysctl_tcp_limit_output_bytes >> 1); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ec492eae0cd7..1a84fffe6993 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -688,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) } } +/* Set the sk_pacing_rate to allow proper sizing of TSO packets. + * Note: TCP stack does not yet implement pacing. + * FQ packet scheduler can be used to implement cheap but effective + * TCP pacing, to smooth the burst on large writes when packets + * in flight is significantly lower than cwnd (or rwin) + */ +static void tcp_update_pacing_rate(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u64 rate; + + /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ + rate = (u64)tp->mss_cache * 2 * (HZ << 3); + + rate *= max(tp->snd_cwnd, tp->packets_out); + + /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), + * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) + * We probably need usec resolution in the future. + * Note: This also takes care of possible srtt=0 case, + * when tcp_rtt_estimator() was not yet called. + */ + if (tp->srtt > 8 + 2) + do_div(rate, tp->srtt); + + sk->sk_pacing_rate = min_t(u64, rate, ~0U); +} + /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ @@ -3278,7 +3306,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight; + u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; u32 prior_fackets; int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; @@ -3383,6 +3411,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); + if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) + tcp_update_pacing_rate(sk); return 1; no_queue: diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f6a005c485a9..273ed735cca2 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -443,7 +443,7 @@ void tcp_init_metrics(struct sock *sk) struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_metrics_block *tm; - u32 val; + u32 val, crtt = 0; /* cached RTT scaled by 8 */ if (dst == NULL) goto reset; @@ -478,40 +478,18 @@ void tcp_init_metrics(struct sock *sk) tp->reordering = val; } - val = tcp_metric_get(tm, TCP_METRIC_RTT); - if (val == 0 || tp->srtt == 0) { - rcu_read_unlock(); - goto reset; - } - /* Initial rtt is determined from SYN,SYN-ACK. - * The segment is small and rtt may appear much - * less than real one. Use per-dst memory - * to make it more realistic. - * - * A bit of theory. RTT is time passed after "normal" sized packet - * is sent until it is ACKed. In normal circumstances sending small - * packets force peer to delay ACKs and calculation is correct too. - * The algorithm is adaptive and, provided we follow specs, it - * NEVER underestimate RTT. BUT! If peer tries to make some clever - * tricks sort of "quick acks" for time long enough to decrease RTT - * to low value, and then abruptly stops to do it and starts to delay - * ACKs, wait for troubles. - */ - val = msecs_to_jiffies(val); - if (val > tp->srtt) { - tp->srtt = val; - tp->rtt_seq = tp->snd_nxt; - } - val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); - if (val > tp->mdev) { - tp->mdev = val; - tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); - } + crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); rcu_read_unlock(); - - tcp_set_rto(sk); reset: - if (tp->srtt == 0) { + if (crtt > tp->srtt) { + /* Initial RTT (tp->srtt) from SYN usually don't measure + * serialization delay on low BW links well so RTO may be + * under-estimated. Stay conservative and seed RTO with + * the RTTs from past data exchanges, using the same seeding + * formula in tcp_rtt_estimator(). + */ + inet_csk(sk)->icsk_rto = crtt + max(crtt >> 2, tcp_rto_min(sk)); + } else if (tp->srtt == 0) { /* RFC6298: 5.7 We've failed to get a valid RTT sample from * 3WHS. This is most likely due to retransmission, * including spurious one. Reset the RTO back to 3secs diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 884efff5b531..e63ae4c9691d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1631,7 +1631,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) /* If a full-sized TSO skb can be sent, do it. */ if (limit >= min_t(unsigned int, sk->sk_gso_max_size, - sk->sk_gso_max_segs * tp->mss_cache)) + tp->xmit_size_goal_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0b24508bcdc4..74d2c95db57f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2337,7 +2337,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, uh->len = htons(skb->len - udp_offset); /* csum segment if tunnel sets skb with csum. */ - if (unlikely(uh->check)) { + if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { struct iphdr *iph = ip_hdr(skb); uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, @@ -2348,7 +2348,18 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; + } else if (protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u32 len = skb->len - udp_offset; + + uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + len, IPPROTO_UDP, 0); + uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + skb->ip_summed = CHECKSUM_NONE; } + skb->protocol = protocol; } while ((skb = skb->next)); out: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2d6d1793bbfe..2a66eaad047f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -204,6 +204,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, .accept_dad = 1, + .suppress_frag_ndisc = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -241,17 +242,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, .accept_dad = 1, + .suppress_frag_ndisc = 1, }; -/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; - /* Check if a valid qdisc is available */ static inline bool addrconf_qdisc_ok(const struct net_device *dev) { @@ -311,36 +304,6 @@ err_ip: return -ENOMEM; } -static void snmp6_free_dev(struct inet6_dev *idev) -{ - kfree(idev->stats.icmpv6msgdev); - kfree(idev->stats.icmpv6dev); - snmp_mib_free((void __percpu **)idev->stats.ipv6); -} - -/* Nobody refers to this device, we may destroy it. */ - -void in6_dev_finish_destroy(struct inet6_dev *idev) -{ - struct net_device *dev = idev->dev; - - WARN_ON(!list_empty(&idev->addr_list)); - WARN_ON(idev->mc_list != NULL); - WARN_ON(timer_pending(&idev->rs_timer)); - -#ifdef NET_REFCNT_DEBUG - pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); -#endif - dev_put(dev); - if (!idev->dead) { - pr_warn("Freeing alive inet6 device %p\n", idev); - return; - } - snmp6_free_dev(idev); - kfree_rcu(idev, rcu); -} -EXPORT_SYMBOL(in6_dev_finish_destroy); - static struct inet6_dev *ipv6_add_dev(struct net_device *dev) { struct inet6_dev *ndev; @@ -3097,6 +3060,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) static void addrconf_rs_timer(unsigned long data) { struct inet6_dev *idev = (struct inet6_dev *)data; + struct net_device *dev = idev->dev; struct in6_addr lladdr; write_lock(&idev->lock); @@ -3111,12 +3075,14 @@ static void addrconf_rs_timer(unsigned long data) goto out; if (idev->rs_probes++ < idev->cnf.rtr_solicits) { - if (!__ipv6_get_lladdr(idev, &lladdr, IFA_F_TENTATIVE)) - ndisc_send_rs(idev->dev, &lladdr, + write_unlock(&idev->lock); + if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) + ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters); else - goto out; + goto put; + write_lock(&idev->lock); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? @@ -3132,6 +3098,7 @@ static void addrconf_rs_timer(unsigned long data) out: write_unlock(&idev->lock); +put: in6_dev_put(idev); } @@ -4188,6 +4155,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; + array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; } static inline size_t inet6_ifla6_size(void) @@ -5002,6 +4970,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec }, { + .procname = "suppress_frag_ndisc", + .data = &ipv6_devconf.suppress_frag_ndisc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { /* sentinel */ } }, diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d2f87427244b..4c11cbcf8308 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -6,6 +6,7 @@ #include <linux/export.h> #include <net/ipv6.h> #include <net/addrconf.h> +#include <net/ip.h> #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) @@ -98,3 +99,52 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v) return atomic_notifier_call_chain(&inet6addr_chain, val, v); } EXPORT_SYMBOL(inet6addr_notifier_call_chain); + +const struct ipv6_stub *ipv6_stub __read_mostly; +EXPORT_SYMBOL_GPL(ipv6_stub); + +/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ +const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +EXPORT_SYMBOL(in6addr_loopback); +const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +EXPORT_SYMBOL(in6addr_any); +const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +EXPORT_SYMBOL(in6addr_linklocal_allnodes); +const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +EXPORT_SYMBOL(in6addr_linklocal_allrouters); +const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); +const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); +const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +EXPORT_SYMBOL(in6addr_sitelocal_allrouters); + +static void snmp6_free_dev(struct inet6_dev *idev) +{ + kfree(idev->stats.icmpv6msgdev); + kfree(idev->stats.icmpv6dev); + snmp_mib_free((void __percpu **)idev->stats.ipv6); +} + +/* Nobody refers to this device, we may destroy it. */ + +void in6_dev_finish_destroy(struct inet6_dev *idev) +{ + struct net_device *dev = idev->dev; + + WARN_ON(!list_empty(&idev->addr_list)); + WARN_ON(idev->mc_list != NULL); + WARN_ON(timer_pending(&idev->rs_timer)); + +#ifdef NET_REFCNT_DEBUG + pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); +#endif + dev_put(dev); + if (!idev->dead) { + pr_warn("Freeing alive inet6 device %p\n", idev); + return; + } + snmp6_free_dev(idev); + kfree_rcu(idev, rcu); +} +EXPORT_SYMBOL(in6_dev_finish_destroy); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0d1a9b153fbb..136fe55c1a47 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -56,6 +56,7 @@ #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> +#include <net/ndisc.h> #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> #endif @@ -810,6 +811,15 @@ static struct pernet_operations inet6_net_ops = { .exit = inet6_net_exit, }; +static const struct ipv6_stub ipv6_stub_impl = { + .ipv6_sock_mc_join = ipv6_sock_mc_join, + .ipv6_sock_mc_drop = ipv6_sock_mc_drop, + .ipv6_dst_lookup = ip6_dst_lookup, + .udpv6_encap_enable = udpv6_encap_enable, + .ndisc_send_na = ndisc_send_na, + .nd_tbl = &nd_tbl, +}; + static int __init inet6_init(void) { struct list_head *r; @@ -884,6 +894,9 @@ static int __init inet6_init(void) err = igmp6_init(); if (err) goto igmp_fail; + + ipv6_stub = &ipv6_stub_impl; + err = ipv6_netfilter_init(); if (err) goto netfilter_fail; @@ -1040,6 +1053,7 @@ static void __exit inet6_exit(void) raw6_proc_exit(); #endif ipv6_netfilter_fini(); + ipv6_stub = NULL; igmp6_cleanup(); ndisc_cleanup(); ip6_mr_cleanup(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index a263b990ee11..d82de7228100 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -91,6 +91,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unsigned int unfrag_ip6hlen; u8 *prevhdr; int offset = 0; + bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_UDP | @@ -106,6 +107,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; + tunnel = skb->encapsulation; ipv6h = ipv6_hdr(skb); __skb_pull(skb, sizeof(*ipv6h)); segs = ERR_PTR(-EPROTONOSUPPORT); @@ -126,7 +128,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ipv6h = ipv6_hdr(skb); ipv6h->payload_len = htons(skb->len - skb->mac_len - sizeof(*ipv6h)); - if (proto == IPPROTO_UDP) { + if (!tunnel && proto == IPPROTO_UDP) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6e3ddf806ec2..dd08cfd8999e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,31 +56,6 @@ #include <net/checksum.h> #include <linux/mroute6.h> -int __ip6_local_out(struct sk_buff *skb) -{ - int len; - - len = skb->len - sizeof(struct ipv6hdr); - if (len > IPV6_MAXPLEN) - len = 0; - ipv6_hdr(skb)->payload_len = htons(len); - - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); -} - -int ip6_local_out(struct sk_buff *skb) -{ - int err; - - err = __ip6_local_out(skb); - if (likely(err == 1)) - err = dst_output(skb); - - return err; -} -EXPORT_SYMBOL_GPL(ip6_local_out); - static int ip6_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 04d31c2fbef1..22210650596f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -428,7 +428,6 @@ static void ndisc_send_skb(struct sk_buff *skb, type = icmp6h->icmp6_type; if (!dst) { - struct sock *sk = net->ipv6.ndisc_sk; struct flowi6 fl6; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); @@ -462,10 +461,10 @@ static void ndisc_send_skb(struct sk_buff *skb, rcu_read_unlock(); } -static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *solicited_addr, - bool router, bool solicited, bool override, bool inc_opt) +void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *solicited_addr, + bool router, bool solicited, bool override, bool inc_opt) { struct sk_buff *skb; struct in6_addr tmpaddr; @@ -663,9 +662,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) } ndisc_send_ns(dev, neigh, target, target, saddr); } else if ((probes -= neigh->parms->app_probes) < 0) { -#ifdef CONFIG_ARPD neigh_app_ns(neigh); -#endif } else { addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); @@ -1519,10 +1516,27 @@ static void pndisc_redo(struct sk_buff *skb) kfree_skb(skb); } +static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) +{ + struct inet6_dev *idev = __in6_dev_get(skb->dev); + + if (!idev) + return true; + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && + idev->cnf.suppress_frag_ndisc) { + net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); + return true; + } + return false; +} + int ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; + if (ndisc_suppress_frag_ndisc(skb)) + return 0; + if (skb_linearize(skb)) return 0; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index ab92a3673fbb..827f795209cf 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -5,6 +5,7 @@ #include <linux/export.h> #include <net/ipv6.h> #include <net/ip6_fib.h> +#include <net/addrconf.h> void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) { @@ -75,3 +76,50 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } EXPORT_SYMBOL(ip6_find_1stfragopt); + +#if IS_ENABLED(CONFIG_IPV6) +int ip6_dst_hoplimit(struct dst_entry *dst) +{ + int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + if (hoplimit == 0) { + struct net_device *dev = dst->dev; + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) + hoplimit = idev->cnf.hop_limit; + else + hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + rcu_read_unlock(); + } + return hoplimit; +} +EXPORT_SYMBOL(ip6_dst_hoplimit); +#endif + +int __ip6_local_out(struct sk_buff *skb) +{ + int len; + + len = skb->len - sizeof(struct ipv6hdr); + if (len > IPV6_MAXPLEN) + len = 0; + ipv6_hdr(skb)->payload_len = htons(len); + + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, + skb_dst(skb)->dev, dst_output); +} +EXPORT_SYMBOL_GPL(__ip6_local_out); + +int ip6_local_out(struct sk_buff *skb) +{ + int err; + + err = __ip6_local_out(skb); + if (likely(err == 1)) + err = dst_output(skb); + + return err; +} +EXPORT_SYMBOL_GPL(ip6_local_out); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 55236a84c748..b770085ae36d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1354,25 +1354,6 @@ out: return entries > rt_max_size; } -int ip6_dst_hoplimit(struct dst_entry *dst) -{ - int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - if (hoplimit == 0) { - struct net_device *dev = dst->dev; - struct inet6_dev *idev; - - rcu_read_lock(); - idev = __in6_dev_get(dev); - if (idev) - hoplimit = idev->cnf.hop_limit; - else - hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; - rcu_read_unlock(); - } - return hoplimit; -} -EXPORT_SYMBOL(ip6_dst_hoplimit); - /* * */ diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 5d1b8d7ac993..60559511bd9c 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -21,26 +21,25 @@ static int udp6_ufo_send_check(struct sk_buff *skb) const struct ipv6hdr *ipv6h; struct udphdr *uh; - /* UDP Tunnel offload on ipv6 is not yet supported. */ - if (skb->encapsulation) - return -EINVAL; - if (!pskb_may_pull(skb, sizeof(*uh))) return -EINVAL; - ipv6h = ipv6_hdr(skb); - uh = udp_hdr(skb); + if (likely(!skb->encapsulation)) { + ipv6h = ipv6_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; return 0; } static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; @@ -75,47 +74,51 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; } - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - /* Check if there is enough headroom to insert fragment header. */ - tnl_hlen = skb_tnl_header_len(skb); - if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { - if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) - goto out; + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot + * do checksum of UDP packets sent as multiple IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + /* Check if there is enough headroom to insert fragment header. */ + tnl_hlen = skb_tnl_header_len(skb); + if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } + + /* Find the unfragmentable header and shift it left by frag_hdr_sz + * bytes to insert fragment header. + */ + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + + unfrag_ip6hlen + tnl_hlen; + packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; + memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); + + SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; + skb->mac_header -= frag_hdr_sz; + skb->network_header -= frag_hdr_sz; + + fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); + fptr->nexthdr = nexthdr; + fptr->reserved = 0; + ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + + /* Fragment the skb. ipv6 header and the remaining fields of the + * fragment header are updated in ipv6_gso_segment() + */ + segs = skb_segment(skb, features); } - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + - unfrag_ip6hlen + tnl_hlen; - packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; - memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); - - SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - out: return segs; } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 36848bd54a77..a0060245b4e1 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -123,7 +123,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true); + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1fdf9ab91c3f..2e8286b47c28 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -88,7 +88,7 @@ #include <linux/virtio_net.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> - +#include <linux/reciprocal_div.h> #ifdef CONFIG_INET #include <net/inet_common.h> #endif @@ -1135,7 +1135,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return (((u64)skb->rxhash) * num) >> 32; + return reciprocal_divide(skb->rxhash, num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1158,6 +1158,13 @@ static unsigned int fanout_demux_cpu(struct packet_fanout *f, return smp_processor_id() % num; } +static unsigned int fanout_demux_rnd(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return reciprocal_divide(prandom_u32(), num); +} + static unsigned int fanout_demux_rollover(struct packet_fanout *f, struct sk_buff *skb, unsigned int idx, unsigned int skip, @@ -1215,6 +1222,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, case PACKET_FANOUT_CPU: idx = fanout_demux_cpu(f, skb, num); break; + case PACKET_FANOUT_RND: + idx = fanout_demux_rnd(f, skb, num); + break; case PACKET_FANOUT_ROLLOVER: idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); break; @@ -1284,6 +1294,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: + case PACKET_FANOUT_RND: break; default: return -EINVAL; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 235e01acac51..c03a32a0418e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -272,6 +272,20 @@ config NET_SCH_FQ_CODEL If unsure, say N. +config NET_SCH_FQ + tristate "Fair Queue" + help + Say Y here if you want to use the FQ packet scheduling algorithm. + + FQ does flow separation, and is able to respect pacing requirements + set by TCP stack into sk->sk_pacing_rate (for localy generated + traffic) + + To compile this driver as a module, choose M here: the module + will be called sch_fq. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 978cbf004e80..e5f9abe9a5db 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o +obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 51b968d3febb..2adda7fa2d39 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -200,6 +200,58 @@ int unregister_qdisc(struct Qdisc_ops *qops) } EXPORT_SYMBOL(unregister_qdisc); +/* Get default qdisc if not otherwise specified */ +void qdisc_get_default(char *name, size_t len) +{ + read_lock(&qdisc_mod_lock); + strlcpy(name, default_qdisc_ops->id, len); + read_unlock(&qdisc_mod_lock); +} + +static struct Qdisc_ops *qdisc_lookup_default(const char *name) +{ + struct Qdisc_ops *q = NULL; + + for (q = qdisc_base; q; q = q->next) { + if (!strcmp(name, q->id)) { + if (!try_module_get(q->owner)) + q = NULL; + break; + } + } + + return q; +} + +/* Set new default qdisc to use */ +int qdisc_set_default(const char *name) +{ + const struct Qdisc_ops *ops; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + write_lock(&qdisc_mod_lock); + ops = qdisc_lookup_default(name); + if (!ops) { + /* Not found, drop lock and try to load module */ + write_unlock(&qdisc_mod_lock); + request_module("sch_%s", name); + write_lock(&qdisc_mod_lock); + + ops = qdisc_lookup_default(name); + } + + if (ops) { + /* Set new default */ + module_put(default_qdisc_ops->owner); + default_qdisc_ops = ops; + } + write_unlock(&qdisc_mod_lock); + + return ops ? 0 : -ENOENT; +} + /* We know handle. Find qdisc among all qdisc's attached to device (root qdisc, all its children, children of children etc.) */ @@ -1854,6 +1906,7 @@ static int __init pktsched_init(void) return err; } + register_qdisc(&pfifo_fast_ops); register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); register_qdisc(&pfifo_head_drop_qdisc_ops); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c new file mode 100644 index 000000000000..32ad015ee8ce --- /dev/null +++ b/net/sched/sch_fq.c @@ -0,0 +1,793 @@ +/* + * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) + * + * Copyright (C) 2013 Eric Dumazet <edumazet@google.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Meant to be mostly used for localy generated traffic : + * Fast classification depends on skb->sk being set before reaching us. + * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash. + * All packets belonging to a socket are considered as a 'flow'. + * + * Flows are dynamically allocated and stored in a hash table of RB trees + * They are also part of one Round Robin 'queues' (new or old flows) + * + * Burst avoidance (aka pacing) capability : + * + * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a + * bunch of packets, and this packet scheduler adds delay between + * packets to respect rate limitation. + * + * enqueue() : + * - lookup one RB tree (out of 1024 or more) to find the flow. + * If non existent flow, create it, add it to the tree. + * Add skb to the per flow list of skb (fifo). + * - Use a special fifo for high prio packets + * + * dequeue() : serves flows in Round Robin + * Note : When a flow becomes empty, we do not immediately remove it from + * rb trees, for performance reasons (its expected to send additional packets, + * or SLAB cache will reuse socket for another flow) + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/jiffies.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/rbtree.h> +#include <linux/hash.h> +#include <linux/prefetch.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/sock.h> +#include <net/tcp_states.h> + +/* + * Per flow structure, dynamically allocated + */ +struct fq_flow { + struct sk_buff *head; /* list of skbs for this flow : first skb */ + union { + struct sk_buff *tail; /* last skb in the list */ + unsigned long age; /* jiffies when flow was emptied, for gc */ + }; + struct rb_node fq_node; /* anchor in fq_root[] trees */ + struct sock *sk; + int qlen; /* number of packets in flow queue */ + int credit; + u32 socket_hash; /* sk_hash */ + struct fq_flow *next; /* next pointer in RR lists, or &detached */ + + struct rb_node rate_node; /* anchor in q->delayed tree */ + u64 time_next_packet; +}; + +struct fq_flow_head { + struct fq_flow *first; + struct fq_flow *last; +}; + +struct fq_sched_data { + struct fq_flow_head new_flows; + + struct fq_flow_head old_flows; + + struct rb_root delayed; /* for rate limited flows */ + u64 time_next_delayed_flow; + + struct fq_flow internal; /* for non classified or high prio packets */ + u32 quantum; + u32 initial_quantum; + u32 flow_default_rate;/* rate per flow : bytes per second */ + u32 flow_max_rate; /* optional max rate per flow */ + u32 flow_plimit; /* max packets per flow */ + struct rb_root *fq_root; + u8 rate_enable; + u8 fq_trees_log; + + u32 flows; + u32 inactive_flows; + u32 throttled_flows; + + u64 stat_gc_flows; + u64 stat_internal_packets; + u64 stat_tcp_retrans; + u64 stat_throttled; + u64 stat_flows_plimit; + u64 stat_pkts_too_long; + u64 stat_allocation_errors; + struct qdisc_watchdog watchdog; +}; + +/* special value to mark a detached flow (not on old/new list) */ +static struct fq_flow detached, throttled; + +static void fq_flow_set_detached(struct fq_flow *f) +{ + f->next = &detached; +} + +static bool fq_flow_is_detached(const struct fq_flow *f) +{ + return f->next == &detached; +} + +static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) +{ + struct rb_node **p = &q->delayed.rb_node, *parent = NULL; + + while (*p) { + struct fq_flow *aux; + + parent = *p; + aux = container_of(parent, struct fq_flow, rate_node); + if (f->time_next_packet >= aux->time_next_packet) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&f->rate_node, parent, p); + rb_insert_color(&f->rate_node, &q->delayed); + q->throttled_flows++; + q->stat_throttled++; + + f->next = &throttled; + if (q->time_next_delayed_flow > f->time_next_packet) + q->time_next_delayed_flow = f->time_next_packet; +} + + +static struct kmem_cache *fq_flow_cachep __read_mostly; + +static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) +{ + if (head->first) + head->last->next = flow; + else + head->first = flow; + head->last = flow; + flow->next = NULL; +} + +/* limit number of collected flows per round */ +#define FQ_GC_MAX 8 +#define FQ_GC_AGE (3*HZ) + +static bool fq_gc_candidate(const struct fq_flow *f) +{ + return fq_flow_is_detached(f) && + time_after(jiffies, f->age + FQ_GC_AGE); +} + +static void fq_gc(struct fq_sched_data *q, + struct rb_root *root, + struct sock *sk) +{ + struct fq_flow *f, *tofree[FQ_GC_MAX]; + struct rb_node **p, *parent; + int fcnt = 0; + + p = &root->rb_node; + parent = NULL; + while (*p) { + parent = *p; + + f = container_of(parent, struct fq_flow, fq_node); + if (f->sk == sk) + break; + + if (fq_gc_candidate(f)) { + tofree[fcnt++] = f; + if (fcnt == FQ_GC_MAX) + break; + } + + if (f->sk > sk) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + + q->flows -= fcnt; + q->inactive_flows -= fcnt; + q->stat_gc_flows += fcnt; + while (fcnt) { + struct fq_flow *f = tofree[--fcnt]; + + rb_erase(&f->fq_node, root); + kmem_cache_free(fq_flow_cachep, f); + } +} + +static const u8 prio2band[TC_PRIO_MAX + 1] = { + 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) +{ + struct rb_node **p, *parent; + struct sock *sk = skb->sk; + struct rb_root *root; + struct fq_flow *f; + int band; + + /* warning: no starvation prevention... */ + band = prio2band[skb->priority & TC_PRIO_MAX]; + if (unlikely(band == 0)) + return &q->internal; + + if (unlikely(!sk)) { + /* By forcing low order bit to 1, we make sure to not + * collide with a local flow (socket pointers are word aligned) + */ + sk = (struct sock *)(skb_get_rxhash(skb) | 1L); + } + + root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; + + if (q->flows >= (2U << q->fq_trees_log) && + q->inactive_flows > q->flows/2) + fq_gc(q, root, sk); + + p = &root->rb_node; + parent = NULL; + while (*p) { + parent = *p; + + f = container_of(parent, struct fq_flow, fq_node); + if (f->sk == sk) { + /* socket might have been reallocated, so check + * if its sk_hash is the same. + * It not, we need to refill credit with + * initial quantum + */ + if (unlikely(skb->sk && + f->socket_hash != sk->sk_hash)) { + f->credit = q->initial_quantum; + f->socket_hash = sk->sk_hash; + } + return f; + } + if (f->sk > sk) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + + f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!f)) { + q->stat_allocation_errors++; + return &q->internal; + } + fq_flow_set_detached(f); + f->sk = sk; + if (skb->sk) + f->socket_hash = sk->sk_hash; + f->credit = q->initial_quantum; + + rb_link_node(&f->fq_node, parent, p); + rb_insert_color(&f->fq_node, root); + + q->flows++; + q->inactive_flows++; + return f; +} + + +/* remove one skb from head of flow queue */ +static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) +{ + struct sk_buff *skb = flow->head; + + if (skb) { + flow->head = skb->next; + skb->next = NULL; + flow->qlen--; + } + return skb; +} + +/* We might add in the future detection of retransmits + * For the time being, just return false + */ +static bool skb_is_retransmit(struct sk_buff *skb) +{ + return false; +} + +/* add skb to flow queue + * flow queue is a linked list, kind of FIFO, except for TCP retransmits + * We special case tcp retransmits to be transmitted before other packets. + * We rely on fact that TCP retransmits are unlikely, so we do not waste + * a separate queue or a pointer. + * head-> [retrans pkt 1] + * [retrans pkt 2] + * [ normal pkt 1] + * [ normal pkt 2] + * [ normal pkt 3] + * tail-> [ normal pkt 4] + */ +static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) +{ + struct sk_buff *prev, *head = flow->head; + + skb->next = NULL; + if (!head) { + flow->head = skb; + flow->tail = skb; + return; + } + if (likely(!skb_is_retransmit(skb))) { + flow->tail->next = skb; + flow->tail = skb; + return; + } + + /* This skb is a tcp retransmit, + * find the last retrans packet in the queue + */ + prev = NULL; + while (skb_is_retransmit(head)) { + prev = head; + head = head->next; + if (!head) + break; + } + if (!prev) { /* no rtx packet in queue, become the new head */ + skb->next = flow->head; + flow->head = skb; + } else { + if (prev == flow->tail) + flow->tail = skb; + else + skb->next = prev->next; + prev->next = skb; + } +} + +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct fq_sched_data *q = qdisc_priv(sch); + struct fq_flow *f; + + if (unlikely(sch->q.qlen >= sch->limit)) + return qdisc_drop(skb, sch); + + f = fq_classify(skb, q); + if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { + q->stat_flows_plimit++; + return qdisc_drop(skb, sch); + } + + f->qlen++; + flow_queue_add(f, skb); + if (skb_is_retransmit(skb)) + q->stat_tcp_retrans++; + sch->qstats.backlog += qdisc_pkt_len(skb); + if (fq_flow_is_detached(f)) { + fq_flow_add_tail(&q->new_flows, f); + if (q->quantum > f->credit) + f->credit = q->quantum; + q->inactive_flows--; + qdisc_unthrottled(sch); + } + if (unlikely(f == &q->internal)) { + q->stat_internal_packets++; + qdisc_unthrottled(sch); + } + sch->q.qlen++; + + return NET_XMIT_SUCCESS; +} + +static void fq_check_throttled(struct fq_sched_data *q, u64 now) +{ + struct rb_node *p; + + if (q->time_next_delayed_flow > now) + return; + + q->time_next_delayed_flow = ~0ULL; + while ((p = rb_first(&q->delayed)) != NULL) { + struct fq_flow *f = container_of(p, struct fq_flow, rate_node); + + if (f->time_next_packet > now) { + q->time_next_delayed_flow = f->time_next_packet; + break; + } + rb_erase(p, &q->delayed); + q->throttled_flows--; + fq_flow_add_tail(&q->old_flows, f); + } +} + +static struct sk_buff *fq_dequeue(struct Qdisc *sch) +{ + struct fq_sched_data *q = qdisc_priv(sch); + u64 now = ktime_to_ns(ktime_get()); + struct fq_flow_head *head; + struct sk_buff *skb; + struct fq_flow *f; + + skb = fq_dequeue_head(&q->internal); + if (skb) + goto out; + fq_check_throttled(q, now); +begin: + head = &q->new_flows; + if (!head->first) { + head = &q->old_flows; + if (!head->first) { + if (q->time_next_delayed_flow != ~0ULL) + qdisc_watchdog_schedule_ns(&q->watchdog, + q->time_next_delayed_flow); + return NULL; + } + } + f = head->first; + + if (f->credit <= 0) { + f->credit += q->quantum; + head->first = f->next; + fq_flow_add_tail(&q->old_flows, f); + goto begin; + } + + if (unlikely(f->head && now < f->time_next_packet)) { + head->first = f->next; + fq_flow_set_throttled(q, f); + goto begin; + } + + skb = fq_dequeue_head(f); + if (!skb) { + head->first = f->next; + /* force a pass through old_flows to prevent starvation */ + if ((head == &q->new_flows) && q->old_flows.first) { + fq_flow_add_tail(&q->old_flows, f); + } else { + fq_flow_set_detached(f); + f->age = jiffies; + q->inactive_flows++; + } + goto begin; + } + prefetch(&skb->end); + f->time_next_packet = now; + f->credit -= qdisc_pkt_len(skb); + + if (f->credit <= 0 && + q->rate_enable && + skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { + u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; + + rate = min(rate, q->flow_max_rate); + if (rate) { + u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC; + + do_div(len, rate); + /* Since socket rate can change later, + * clamp the delay to 125 ms. + * TODO: maybe segment the too big skb, as in commit + * e43ac79a4bc ("sch_tbf: segment too big GSO packets") + */ + if (unlikely(len > 125 * NSEC_PER_MSEC)) { + len = 125 * NSEC_PER_MSEC; + q->stat_pkts_too_long++; + } + + f->time_next_packet = now + len; + } + } +out: + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + qdisc_unthrottled(sch); + return skb; +} + +static void fq_reset(struct Qdisc *sch) +{ + struct sk_buff *skb; + + while ((skb = fq_dequeue(sch)) != NULL) + kfree_skb(skb); +} + +static void fq_rehash(struct fq_sched_data *q, + struct rb_root *old_array, u32 old_log, + struct rb_root *new_array, u32 new_log) +{ + struct rb_node *op, **np, *parent; + struct rb_root *oroot, *nroot; + struct fq_flow *of, *nf; + int fcnt = 0; + u32 idx; + + for (idx = 0; idx < (1U << old_log); idx++) { + oroot = &old_array[idx]; + while ((op = rb_first(oroot)) != NULL) { + rb_erase(op, oroot); + of = container_of(op, struct fq_flow, fq_node); + if (fq_gc_candidate(of)) { + fcnt++; + kmem_cache_free(fq_flow_cachep, of); + continue; + } + nroot = &new_array[hash_32((u32)(long)of->sk, new_log)]; + + np = &nroot->rb_node; + parent = NULL; + while (*np) { + parent = *np; + + nf = container_of(parent, struct fq_flow, fq_node); + BUG_ON(nf->sk == of->sk); + + if (nf->sk > of->sk) + np = &parent->rb_right; + else + np = &parent->rb_left; + } + + rb_link_node(&of->fq_node, parent, np); + rb_insert_color(&of->fq_node, nroot); + } + } + q->flows -= fcnt; + q->inactive_flows -= fcnt; + q->stat_gc_flows += fcnt; +} + +static int fq_resize(struct fq_sched_data *q, u32 log) +{ + struct rb_root *array; + u32 idx; + + if (q->fq_root && log == q->fq_trees_log) + return 0; + + array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL); + if (!array) + return -ENOMEM; + + for (idx = 0; idx < (1U << log); idx++) + array[idx] = RB_ROOT; + + if (q->fq_root) { + fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); + kfree(q->fq_root); + } + q->fq_root = array; + q->fq_trees_log = log; + + return 0; +} + +static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { + [TCA_FQ_PLIMIT] = { .type = NLA_U32 }, + [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 }, + [TCA_FQ_QUANTUM] = { .type = NLA_U32 }, + [TCA_FQ_INITIAL_QUANTUM] = { .type = NLA_U32 }, + [TCA_FQ_RATE_ENABLE] = { .type = NLA_U32 }, + [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 }, + [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, + [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, +}; + +static int fq_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct fq_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_FQ_MAX + 1]; + int err, drop_count = 0; + u32 fq_log; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy); + if (err < 0) + return err; + + sch_tree_lock(sch); + + fq_log = q->fq_trees_log; + + if (tb[TCA_FQ_BUCKETS_LOG]) { + u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]); + + if (nval >= 1 && nval <= ilog2(256*1024)) + fq_log = nval; + else + err = -EINVAL; + } + if (tb[TCA_FQ_PLIMIT]) + sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]); + + if (tb[TCA_FQ_FLOW_PLIMIT]) + q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]); + + if (tb[TCA_FQ_QUANTUM]) + q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); + + if (tb[TCA_FQ_INITIAL_QUANTUM]) + q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + + if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) + q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); + + if (tb[TCA_FQ_FLOW_MAX_RATE]) + q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + + if (tb[TCA_FQ_RATE_ENABLE]) { + u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); + + if (enable <= 1) + q->rate_enable = enable; + else + err = -EINVAL; + } + + if (!err) + err = fq_resize(q, fq_log); + + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = fq_dequeue(sch); + + kfree_skb(skb); + drop_count++; + } + qdisc_tree_decrease_qlen(sch, drop_count); + + sch_tree_unlock(sch); + return err; +} + +static void fq_destroy(struct Qdisc *sch) +{ + struct fq_sched_data *q = qdisc_priv(sch); + struct rb_root *root; + struct rb_node *p; + unsigned int idx; + + if (q->fq_root) { + for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { + root = &q->fq_root[idx]; + while ((p = rb_first(root)) != NULL) { + rb_erase(p, root); + kmem_cache_free(fq_flow_cachep, + container_of(p, struct fq_flow, fq_node)); + } + } + kfree(q->fq_root); + } + qdisc_watchdog_cancel(&q->watchdog); +} + +static int fq_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct fq_sched_data *q = qdisc_priv(sch); + int err; + + sch->limit = 10000; + q->flow_plimit = 100; + q->quantum = 2 * psched_mtu(qdisc_dev(sch)); + q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); + q->flow_default_rate = 0; + q->flow_max_rate = ~0U; + q->rate_enable = 1; + q->new_flows.first = NULL; + q->old_flows.first = NULL; + q->delayed = RB_ROOT; + q->fq_root = NULL; + q->fq_trees_log = ilog2(1024); + qdisc_watchdog_init(&q->watchdog, sch); + + if (opt) + err = fq_change(sch, opt); + else + err = fq_resize(q, q->fq_trees_log); + + return err; +} + +static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct fq_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || + nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || + nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || + nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || + nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || + nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) || + nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || + nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) + goto nla_put_failure; + + nla_nest_end(skb, opts); + return skb->len; + +nla_put_failure: + return -1; +} + +static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct fq_sched_data *q = qdisc_priv(sch); + u64 now = ktime_to_ns(ktime_get()); + struct tc_fq_qd_stats st = { + .gc_flows = q->stat_gc_flows, + .highprio_packets = q->stat_internal_packets, + .tcp_retrans = q->stat_tcp_retrans, + .throttled = q->stat_throttled, + .flows_plimit = q->stat_flows_plimit, + .pkts_too_long = q->stat_pkts_too_long, + .allocation_errors = q->stat_allocation_errors, + .flows = q->flows, + .inactive_flows = q->inactive_flows, + .throttled_flows = q->throttled_flows, + .time_next_delayed_flow = q->time_next_delayed_flow - now, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static struct Qdisc_ops fq_qdisc_ops __read_mostly = { + .id = "fq", + .priv_size = sizeof(struct fq_sched_data), + + .enqueue = fq_enqueue, + .dequeue = fq_dequeue, + .peek = qdisc_peek_dequeued, + .init = fq_init, + .reset = fq_reset, + .destroy = fq_destroy, + .change = fq_change, + .dump = fq_dump, + .dump_stats = fq_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init fq_module_init(void) +{ + int ret; + + fq_flow_cachep = kmem_cache_create("fq_flow_cache", + sizeof(struct fq_flow), + 0, 0, NULL); + if (!fq_flow_cachep) + return -ENOMEM; + + ret = register_qdisc(&fq_qdisc_ops); + if (ret) + kmem_cache_destroy(fq_flow_cachep); + return ret; +} + +static void __exit fq_module_exit(void) +{ + unregister_qdisc(&fq_qdisc_ops); + kmem_cache_destroy(fq_flow_cachep); +} + +module_init(fq_module_init) +module_exit(fq_module_exit) +MODULE_AUTHOR("Eric Dumazet"); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 48be3d5c0d92..a74e278654aa 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -30,6 +30,10 @@ #include <net/pkt_sched.h> #include <net/dst.h> +/* Qdisc to use by default */ +const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; +EXPORT_SYMBOL(default_qdisc_ops); + /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -530,12 +534,11 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { .dump = pfifo_fast_dump, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(pfifo_fast_ops); static struct lock_class_key qdisc_tx_busylock; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - struct Qdisc_ops *ops) + const struct Qdisc_ops *ops) { void *p; struct Qdisc *sch; @@ -579,10 +582,14 @@ errout: } struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, - struct Qdisc_ops *ops, unsigned int parentid) + const struct Qdisc_ops *ops, + unsigned int parentid) { struct Qdisc *sch; + if (!try_module_get(ops->owner)) + goto errout; + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; @@ -686,7 +693,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (dev->tx_queue_len) { qdisc = qdisc_create_dflt(dev_queue, - &pfifo_fast_ops, TC_H_ROOT); + default_qdisc_ops, TC_H_ROOT); if (!qdisc) { netdev_info(dev, "activation failed\n"); return; @@ -739,9 +746,8 @@ void dev_activate(struct net_device *dev) int need_watchdog; /* No queueing discipline is attached to device; - create default one i.e. pfifo_fast for devices, - which need queueing and noqueue_qdisc for - virtual interfaces + * create default one for devices, which need queueing + * and noqueue_qdisc for virtual interfaces */ if (dev->qdisc == &noop_qdisc) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 5da78a19ac9a..2e56185736d6 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -57,7 +57,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { dev_queue = netdev_get_tx_queue(dev, ntx); - qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, + qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); if (qdisc == NULL) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index accec33c454c..d44c868cb537 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -124,7 +124,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); - qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, + qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(i + 1))); if (qdisc == NULL) { diff --git a/net/sctp/probe.c b/net/sctp/probe.c index cd72ae57aff1..53c452efb40b 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -46,6 +46,10 @@ static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); +static unsigned int fwmark __read_mostly = 0; +MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); +module_param(fwmark, uint, 0); + static int bufsize __read_mostly = 64 * 1024; MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); module_param(bufsize, int, 0); @@ -129,15 +133,19 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, void *arg, sctp_cmd_seq_t *commands) { + struct sctp_chunk *chunk = arg; + struct sk_buff *skb = chunk->skb; struct sctp_transport *sp; static __u32 lcwnd = 0; struct timespec now; sp = asoc->peer.primary_path; - if ((full || sp->cwnd != lcwnd) && - (!port || asoc->peer.port == port || - ep->base.bind_addr.port == port)) { + if (((port == 0 && fwmark == 0) || + asoc->peer.port == port || + ep->base.bind_addr.port == port || + (fwmark > 0 && skb->mark == fwmark)) && + (full || sp->cwnd != lcwnd)) { lcwnd = sp->cwnd; getnstimeofday(&now); @@ -198,8 +206,8 @@ static __init int sctpprobe_init(void) if (ret) goto remove_proc; - pr_info("probe registered (port=%d)\n", port); - + pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", + port, fwmark, bufsize); return 0; remove_proc: diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 01e97836ca6c..d244a23ab8d3 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2240,25 +2240,23 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, struct sctp_chunk **errp) { union sctp_params param; - int has_cookie = 0; + bool has_cookie = false; int result; - /* Verify stream values are non-zero. */ - if ((0 == peer_init->init_hdr.num_outbound_streams) || - (0 == peer_init->init_hdr.num_inbound_streams) || - (0 == peer_init->init_hdr.init_tag) || - (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) { - + /* Check for missing mandatory parameters. Note: Initial TSN is + * also mandatory, but is not checked here since the valid range + * is 0..2**32-1. RFC4960, section 3.3.3. + */ + if (peer_init->init_hdr.num_outbound_streams == 0 || + peer_init->init_hdr.num_inbound_streams == 0 || + peer_init->init_hdr.init_tag == 0 || + ntohl(peer_init->init_hdr.a_rwnd) < SCTP_DEFAULT_MINWINDOW) return sctp_process_inv_mandatory(asoc, chunk, errp); - } - /* Check for missing mandatory parameters. */ sctp_walk_params(param, peer_init, init_hdr.params) { - - if (SCTP_PARAM_STATE_COOKIE == param.p->type) - has_cookie = 1; - - } /* for (loop through all parameters) */ + if (param.p->type == SCTP_PARAM_STATE_COOKIE) + has_cookie = true; + } /* There is a possibility that a parameter length was bad and * in that case we would have stoped walking the parameters. |