diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_main.c')
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 316 |
1 files changed, 268 insertions, 48 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 7302498c6df3..867e87af3432 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2019 Google, Inc. + * Copyright (C) 2015-2021 Google, Inc. */ #include <linux/cpumask.h> @@ -14,6 +14,7 @@ #include <linux/workqueue.h> #include <net/sch_generic.h> #include "gve.h" +#include "gve_dqo.h" #include "gve_adminq.h" #include "gve_register.h" @@ -26,6 +27,16 @@ const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; +static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (gve_is_gqi(priv)) + return gve_tx(skb, dev); + else + return gve_tx_dqo(skb, dev); +} + static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) { struct gve_priv *priv = netdev_priv(dev); @@ -155,6 +166,15 @@ static irqreturn_t gve_intr(int irq, void *arg) return IRQ_HANDLED; } +static irqreturn_t gve_intr_dqo(int irq, void *arg) +{ + struct gve_notify_block *block = arg; + + /* Interrupts are automatically masked */ + napi_schedule_irqoff(&block->napi); + return IRQ_HANDLED; +} + static int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; @@ -180,7 +200,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) /* Double check we have no extra work. * Ensure unmask synchronizes with checking for work. */ - dma_rmb(); + mb(); if (block->tx) reschedule |= gve_tx_poll(block, -1); if (block->rx) @@ -191,6 +211,54 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) return 0; } +static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) +{ + struct gve_notify_block *block = + container_of(napi, struct gve_notify_block, napi); + struct gve_priv *priv = block->priv; + bool reschedule = false; + int work_done = 0; + + /* Clear PCI MSI-X Pending Bit Array (PBA) + * + * This bit is set if an interrupt event occurs while the vector is + * masked. If this bit is set and we reenable the interrupt, it will + * fire again. Since we're just about to poll the queue state, we don't + * need it to fire again. + * + * Under high softirq load, it's possible that the interrupt condition + * is triggered twice before we got the chance to process it. + */ + gve_write_irq_doorbell_dqo(priv, block, + GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO); + + if (block->tx) + reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + + if (block->rx) { + work_done = gve_rx_poll_dqo(block, budget); + reschedule |= work_done == budget; + } + + if (reschedule) + return budget; + + if (likely(napi_complete_done(napi, work_done))) { + /* Enable interrupts again. + * + * We don't need to repoll afterwards because HW supports the + * PCI MSI-X PBA feature. + * + * Another interrupt would be triggered if a new event came in + * since the last one. + */ + gve_write_irq_doorbell_dqo(priv, block, + GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); + } + + return work_done; +} + static int gve_alloc_notify_blocks(struct gve_priv *priv) { int num_vecs_requested = priv->num_ntfy_blks + 1; @@ -220,6 +288,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) int vecs_left = new_num_ntfy_blks % 2; priv->num_ntfy_blks = new_num_ntfy_blks; + priv->mgmt_msix_idx = priv->num_ntfy_blks; priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, vecs_per_type); priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, @@ -263,7 +332,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) name, i); block->priv = priv; err = request_irq(priv->msix_vectors[msix_idx].vector, - gve_intr, 0, block->name, block); + gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, + 0, block->name, block); if (err) { dev_err(&priv->pdev->dev, "Failed to receive msix vector %d\n", i); @@ -300,20 +370,22 @@ static void gve_free_notify_blocks(struct gve_priv *priv) { int i; - /* Free the irqs */ - for (i = 0; i < priv->num_ntfy_blks; i++) { - struct gve_notify_block *block = &priv->ntfy_blocks[i]; - int msix_idx = i; + if (priv->msix_vectors) { + /* Free the irqs */ + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + int msix_idx = i; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - NULL); - free_irq(priv->msix_vectors[msix_idx].vector, block); + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + NULL); + free_irq(priv->msix_vectors[msix_idx].vector, block); + } + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); } dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), priv->ntfy_blocks, priv->ntfy_block_bus); priv->ntfy_blocks = NULL; - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); pci_disable_msix(priv->pdev); kvfree(priv->msix_vectors); priv->msix_vectors = NULL; @@ -343,6 +415,22 @@ static int gve_setup_device_resources(struct gve_priv *priv) err = -ENXIO; goto abort_with_stats_report; } + + if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), + GFP_KERNEL); + if (!priv->ptype_lut_dqo) { + err = -ENOMEM; + goto abort_with_stats_report; + } + err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to get ptype map: err=%d\n", err); + goto abort_with_ptype_lut; + } + } + err = gve_adminq_report_stats(priv, priv->stats_report_len, priv->stats_report_bus, GVE_STATS_REPORT_TIMER_PERIOD); @@ -351,12 +439,17 @@ static int gve_setup_device_resources(struct gve_priv *priv) "Failed to report stats: err=%d\n", err); gve_set_device_resources_ok(priv); return 0; + +abort_with_ptype_lut: + kvfree(priv->ptype_lut_dqo); + priv->ptype_lut_dqo = NULL; abort_with_stats_report: gve_free_stats_report(priv); abort_with_ntfy_blocks: gve_free_notify_blocks(priv); abort_with_counter: gve_free_counter_array(priv); + return err; } @@ -383,17 +476,22 @@ static void gve_teardown_device_resources(struct gve_priv *priv) gve_trigger_reset(priv); } } + + kvfree(priv->ptype_lut_dqo); + priv->ptype_lut_dqo = NULL; + gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); gve_clear_device_resources_ok(priv); } -static void gve_add_napi(struct gve_priv *priv, int ntfy_idx) +static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, + int (*gve_poll)(struct napi_struct *, int)) { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_add(priv->dev, &block->napi, gve_napi_poll, + netif_napi_add(priv->dev, &block->napi, gve_poll, NAPI_POLL_WEIGHT); } @@ -473,31 +571,75 @@ static int gve_create_rings(struct gve_priv *priv) netif_dbg(priv, drv, priv->dev, "created %d rx queues\n", priv->rx_cfg.num_queues); - /* Rx data ring has been prefilled with packet buffers at queue - * allocation time. - * Write the doorbell to provide descriptor slots and packet buffers - * to the NIC. - */ - for (i = 0; i < priv->rx_cfg.num_queues; i++) - gve_rx_write_doorbell(priv, &priv->rx[i]); + if (gve_is_gqi(priv)) { + /* Rx data ring has been prefilled with packet buffers at queue + * allocation time. + * + * Write the doorbell to provide descriptor slots and packet + * buffers to the NIC. + */ + for (i = 0; i < priv->rx_cfg.num_queues; i++) + gve_rx_write_doorbell(priv, &priv->rx[i]); + } else { + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + /* Post buffers and ring doorbell. */ + gve_rx_post_buffers_dqo(&priv->rx[i]); + } + } return 0; } +static void add_napi_init_sync_stats(struct gve_priv *priv, + int (*napi_poll)(struct napi_struct *napi, + int budget)) +{ + int i; + + /* Add tx napi & init sync stats*/ + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); + + u64_stats_init(&priv->tx[i].statss); + priv->tx[i].ntfy_id = ntfy_idx; + gve_add_napi(priv, ntfy_idx, napi_poll); + } + /* Add rx napi & init sync stats*/ + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); + + u64_stats_init(&priv->rx[i].statss); + priv->rx[i].ntfy_id = ntfy_idx; + gve_add_napi(priv, ntfy_idx, napi_poll); + } +} + +static void gve_tx_free_rings(struct gve_priv *priv) +{ + if (gve_is_gqi(priv)) { + gve_tx_free_rings_gqi(priv); + } else { + gve_tx_free_rings_dqo(priv); + } +} + static int gve_alloc_rings(struct gve_priv *priv) { - int ntfy_idx; int err; - int i; /* Setup tx rings */ priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx), GFP_KERNEL); if (!priv->tx) return -ENOMEM; - err = gve_tx_alloc_rings(priv); + + if (gve_is_gqi(priv)) + err = gve_tx_alloc_rings(priv); + else + err = gve_tx_alloc_rings_dqo(priv); if (err) goto free_tx; + /* Setup rx rings */ priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx), GFP_KERNEL); @@ -505,21 +647,18 @@ static int gve_alloc_rings(struct gve_priv *priv) err = -ENOMEM; goto free_tx_queue; } - err = gve_rx_alloc_rings(priv); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_rings(priv); + else + err = gve_rx_alloc_rings_dqo(priv); if (err) goto free_rx; - /* Add tx napi & init sync stats*/ - for (i = 0; i < priv->tx_cfg.num_queues; i++) { - u64_stats_init(&priv->tx[i].statss); - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_add_napi(priv, ntfy_idx); - } - /* Add rx napi & init sync stats*/ - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - u64_stats_init(&priv->rx[i].statss); - ntfy_idx = gve_rx_idx_to_ntfy(priv, i); - gve_add_napi(priv, ntfy_idx); - } + + if (gve_is_gqi(priv)) + add_napi_init_sync_stats(priv, gve_napi_poll); + else + add_napi_init_sync_stats(priv, gve_napi_poll_dqo); return 0; @@ -557,6 +696,14 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } +static void gve_rx_free_rings(struct gve_priv *priv) +{ + if (gve_is_gqi(priv)) + gve_rx_free_rings_gqi(priv); + else + gve_rx_free_rings_dqo(priv); +} + static void gve_free_rings(struct gve_priv *priv) { int ntfy_idx; @@ -678,7 +825,7 @@ static int gve_alloc_qpls(struct gve_priv *priv) int err; /* Raw addressing means no QPLs */ - if (priv->raw_addressing) + if (priv->queue_format == GVE_GQI_RDA_FORMAT) return 0; priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL); @@ -722,7 +869,7 @@ static void gve_free_qpls(struct gve_priv *priv) int i; /* Raw addressing means no QPLs */ - if (priv->raw_addressing) + if (priv->queue_format == GVE_GQI_RDA_FORMAT) return; kvfree(priv->qpl_cfg.qpl_id_map); @@ -756,6 +903,7 @@ static int gve_open(struct net_device *dev) err = gve_alloc_qpls(priv); if (err) return err; + err = gve_alloc_rings(priv); if (err) goto free_qpls; @@ -770,9 +918,17 @@ static int gve_open(struct net_device *dev) err = gve_register_qpls(priv); if (err) goto reset; + + if (!gve_is_gqi(priv)) { + /* Hard code this for now. This may be tuned in the future for + * performance. + */ + priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; + } err = gve_create_rings(priv); if (err) goto reset; + gve_set_device_rings_ok(priv); if (gve_get_report_stats(priv)) @@ -921,14 +1077,26 @@ static void gve_turnup(struct gve_priv *priv) struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; napi_enable(&block->napi); - iowrite32be(0, gve_irq_doorbell(priv, block)); + if (gve_is_gqi(priv)) { + iowrite32be(0, gve_irq_doorbell(priv, block)); + } else { + u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO); + + gve_write_irq_doorbell_dqo(priv, block, val); + } } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; napi_enable(&block->napi); - iowrite32be(0, gve_irq_doorbell(priv, block)); + if (gve_is_gqi(priv)) { + iowrite32be(0, gve_irq_doorbell(priv, block)); + } else { + u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO); + + gve_write_irq_doorbell_dqo(priv, block, val); + } } gve_set_napi_enabled(priv); @@ -942,12 +1110,49 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) priv->tx_timeo_cnt++; } +static int gve_set_features(struct net_device *netdev, + netdev_features_t features) +{ + const netdev_features_t orig_features = netdev->features; + struct gve_priv *priv = netdev_priv(netdev); + int err; + + if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { + netdev->features ^= NETIF_F_LRO; + if (netif_carrier_ok(netdev)) { + /* To make this process as simple as possible we + * teardown the device, set the new configuration, + * and then bring the device up again. + */ + err = gve_close(netdev); + /* We have already tried to reset in close, just fail + * at this point. + */ + if (err) + goto err; + + err = gve_open(netdev); + if (err) + goto err; + } + } + + return 0; +err: + /* Reverts the change on error. */ + netdev->features = orig_features; + netif_err(priv, drv, netdev, + "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); + return err; +} + static const struct net_device_ops gve_netdev_ops = { - .ndo_start_xmit = gve_tx, + .ndo_start_xmit = gve_start_xmit, .ndo_open = gve_open, .ndo_stop = gve_close, .ndo_get_stats64 = gve_get_stats, .ndo_tx_timeout = gve_tx_timeout, + .ndo_set_features = gve_set_features, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -991,6 +1196,15 @@ void gve_handle_report_stats(struct gve_priv *priv) /* tx stats */ if (priv->tx) { for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + u32 last_completion = 0; + u32 tx_frames = 0; + + /* DQO doesn't currently support these metrics. */ + if (gve_is_gqi(priv)) { + last_completion = priv->tx[idx].done; + tx_frames = priv->tx[idx].req; + } + do { start = u64_stats_fetch_begin(&priv->tx[idx].statss); tx_bytes = priv->tx[idx].bytes_done; @@ -1007,7 +1221,7 @@ void gve_handle_report_stats(struct gve_priv *priv) }; stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(TX_FRAMES_SENT), - .value = cpu_to_be64(priv->tx[idx].req), + .value = cpu_to_be64(tx_frames), .queue_id = cpu_to_be32(idx), }; stats[stats_idx++] = (struct stats) { @@ -1017,7 +1231,7 @@ void gve_handle_report_stats(struct gve_priv *priv) }; stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED), - .value = cpu_to_be64(priv->tx[idx].done), + .value = cpu_to_be64(last_completion), .queue_id = cpu_to_be32(idx), }; } @@ -1085,7 +1299,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) if (skip_describe_device) goto setup_device; - priv->raw_addressing = false; + priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED; /* Get the initial information we need from the device */ err = gve_adminq_describe_device(priv); if (err) { @@ -1093,7 +1307,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) "Could not get device information: err=%d\n", err); goto err; } - if (priv->dev->max_mtu > PAGE_SIZE) { + if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) { priv->dev->max_mtu = PAGE_SIZE; err = gve_adminq_set_mtu(priv, priv->dev->mtu); if (err) { @@ -1292,8 +1506,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) gve_write_version(®_bar->driver_version); /* Get max queues to alloc etherdev */ - max_rx_queues = ioread32be(®_bar->max_tx_queues); - max_tx_queues = ioread32be(®_bar->max_rx_queues); + max_tx_queues = ioread32be(®_bar->max_tx_queues); + max_rx_queues = ioread32be(®_bar->max_rx_queues); /* Alloc and setup the netdev and priv */ dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); if (!dev) { @@ -1304,7 +1518,12 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; - /* advertise features */ + + /* Set default and supported features. + * + * Features might be set in other locations as well (such as + * `gve_adminq_describe_device`). + */ dev->hw_features = NETIF_F_HIGHDMA; dev->hw_features |= NETIF_F_SG; dev->hw_features |= NETIF_F_HW_CSUM; @@ -1349,6 +1568,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto abort_with_wq; dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); + dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); gve_clear_probe_in_progress(priv); queue_work(priv->gve_wq, &priv->service_task); return 0; |