diff options
Diffstat (limited to 'drivers/net/ethernet/cavium')
25 files changed, 1509 insertions, 722 deletions
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h index 2fedd91f3df8..dee604651ba7 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -43,6 +43,8 @@ struct octeon_cn23xx_pf { struct octeon_config *conf; }; +#define CN23XX_SLI_DEF_BP 0x40 + int setup_cn23xx_octeon_pf_device(struct octeon_device *oct); int validate_cn23xx_pf_config_info(struct octeon_device *oct, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index f629c2fe04a4..796c2cbc11f6 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -26,6 +26,9 @@ #include "octeon_main.h" #include "octeon_network.h" +/* OOM task polling interval */ +#define LIO_OOM_POLL_INTERVAL_MS 250 + int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) { struct lio *lio = GET_LIO(netdev); @@ -124,6 +127,17 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) struct octeon_device *oct = lio->oct_dev; u8 *mac; + if (nctrl->completion && nctrl->response_code) { + /* Signal whoever is interested that the response code from the + * firmware has arrived. + */ + WRITE_ONCE(*nctrl->response_code, nctrl->status); + complete(nctrl->completion); + } + + if (nctrl->status) + return; + switch (nctrl->ncmd.s.cmd) { case OCTNET_CMD_CHANGE_DEVFLAGS: case OCTNET_CMD_SET_MULTI_LIST: @@ -131,11 +145,20 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) case OCTNET_CMD_CHANGE_MACADDR: mac = ((u8 *)&nctrl->udd[0]) + 2; - netif_info(lio, probe, lio->netdev, - "MACAddr changed to %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", - mac[0], mac[1], - mac[2], mac[3], - mac[4], mac[5]); + if (nctrl->ncmd.s.param1) { + /* vfidx is 0 based, but vf_num (param1) is 1 based */ + int vfidx = nctrl->ncmd.s.param1 - 1; + bool mac_is_admin_assigned = nctrl->ncmd.s.param2; + + if (mac_is_admin_assigned) + netif_info(lio, probe, lio->netdev, + "MAC Address %pM is configured for VF %d\n", + mac, vfidx); + } else { + netif_info(lio, probe, lio->netdev, + " MACAddr changed to %pM\n", + mac); + } break; case OCTNET_CMD_CHANGE_MTU: @@ -284,3 +307,56 @@ void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac) * the PF did that already */ } + +static void octnet_poll_check_rxq_oom_status(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio *lio = (struct lio *)wk->ctxptr; + struct octeon_device *oct = lio->oct_dev; + struct octeon_droq *droq; + int q, q_no = 0; + + if (ifstate_check(lio, LIO_IFSTATE_RUNNING)) { + for (q = 0; q < lio->linfo.num_rxpciq; q++) { + q_no = lio->linfo.rxpciq[q].s.q_no; + droq = oct->droq[q_no]; + if (!droq) + continue; + octeon_droq_check_oom(droq); + } + } + queue_delayed_work(lio->rxq_status_wq.wq, + &lio->rxq_status_wq.wk.work, + msecs_to_jiffies(LIO_OOM_POLL_INTERVAL_MS)); +} + +int setup_rx_oom_poll_fn(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + + lio->rxq_status_wq.wq = alloc_workqueue("rxq-oom-status", + WQ_MEM_RECLAIM, 0); + if (!lio->rxq_status_wq.wq) { + dev_err(&oct->pci_dev->dev, "unable to create cavium rxq oom status wq\n"); + return -ENOMEM; + } + INIT_DELAYED_WORK(&lio->rxq_status_wq.wk.work, + octnet_poll_check_rxq_oom_status); + lio->rxq_status_wq.wk.ctxptr = lio; + queue_delayed_work(lio->rxq_status_wq.wq, + &lio->rxq_status_wq.wk.work, + msecs_to_jiffies(LIO_OOM_POLL_INTERVAL_MS)); + return 0; +} + +void cleanup_rx_oom_poll_fn(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + + if (lio->rxq_status_wq.wq) { + cancel_delayed_work_sync(&lio->rxq_status_wq.wk.work); + flush_workqueue(lio->rxq_status_wq.wq); + destroy_workqueue(lio->rxq_status_wq.wq); + } +} diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 50384cede8be..579dc7336f58 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -33,6 +33,19 @@ static int octnet_get_link_stats(struct net_device *netdev); +struct oct_intrmod_context { + int octeon_id; + wait_queue_head_t wc; + int cond; + int status; +}; + +struct oct_intrmod_resp { + u64 rh; + struct oct_intrmod_cfg intrmod; + u64 status; +}; + struct oct_mdio_cmd_context { int octeon_id; wait_queue_head_t wc; @@ -213,17 +226,23 @@ static int lio_get_link_ksettings(struct net_device *netdev, struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; struct oct_link_info *linfo; - u32 supported, advertising; + u32 supported = 0, advertising = 0; linfo = &lio->linfo; if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI || linfo->link.s.if_mode == INTERFACE_MODE_RXAUI || + linfo->link.s.if_mode == INTERFACE_MODE_XLAUI || linfo->link.s.if_mode == INTERFACE_MODE_XFI) { ecmd->base.port = PORT_FIBRE; - supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE | - SUPPORTED_Pause); - advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause); + + if (linfo->link.s.speed == SPEED_10000) { + supported = SUPPORTED_10000baseT_Full; + advertising = ADVERTISED_10000baseT_Full; + } + + supported |= SUPPORTED_FIBRE | SUPPORTED_Pause; + advertising |= ADVERTISED_Pause; ethtool_convert_legacy_u32_to_link_mode( ecmd->link_modes.supported, supported); ethtool_convert_legacy_u32_to_link_mode( @@ -1292,95 +1311,101 @@ static int lio_vf_get_sset_count(struct net_device *netdev, int sset) } } -static int lio_get_intr_coalesce(struct net_device *netdev, - struct ethtool_coalesce *intr_coal) +/* Callback function for intrmod */ +static void octnet_intrmod_callback(struct octeon_device *oct_dev, + u32 status, + void *ptr) { - struct lio *lio = GET_LIO(netdev); - struct octeon_device *oct = lio->oct_dev; - struct octeon_instr_queue *iq; - struct oct_intrmod_cfg *intrmod_cfg; + struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; + struct oct_intrmod_context *ctx; - intrmod_cfg = &oct->intrmod; + ctx = (struct oct_intrmod_context *)sc->ctxptr; - switch (oct->chip_id) { - case OCTEON_CN23XX_PF_VID: - case OCTEON_CN23XX_VF_VID: - if (!intrmod_cfg->rx_enable) { - intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs; - intr_coal->rx_max_coalesced_frames = - intrmod_cfg->rx_frames; - } - if (!intrmod_cfg->tx_enable) - intr_coal->tx_max_coalesced_frames = - intrmod_cfg->tx_frames; - break; - case OCTEON_CN68XX: - case OCTEON_CN66XX: { - struct octeon_cn6xxx *cn6xxx = - (struct octeon_cn6xxx *)oct->chip; + ctx->status = status; - if (!intrmod_cfg->rx_enable) { - intr_coal->rx_coalesce_usecs = - CFG_GET_OQ_INTR_TIME(cn6xxx->conf); - intr_coal->rx_max_coalesced_frames = - CFG_GET_OQ_INTR_PKT(cn6xxx->conf); - } - iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no]; - intr_coal->tx_max_coalesced_frames = iq->fill_threshold; - break; - } - default: - netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + WRITE_ONCE(ctx->cond, 1); + + /* This barrier is required to be sure that the response has been + * written fully before waking up the handler + */ + wmb(); + + wake_up_interruptible(&ctx->wc); +} + +/* get interrupt moderation parameters */ +static int octnet_get_intrmod_cfg(struct lio *lio, + struct oct_intrmod_cfg *intr_cfg) +{ + struct octeon_soft_command *sc; + struct oct_intrmod_context *ctx; + struct oct_intrmod_resp *resp; + int retval; + struct octeon_device *oct_dev = lio->oct_dev; + + /* Alloc soft command */ + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct_dev, + 0, + sizeof(struct oct_intrmod_resp), + sizeof(struct oct_intrmod_context)); + + if (!sc) + return -ENOMEM; + + resp = (struct oct_intrmod_resp *)sc->virtrptr; + memset(resp, 0, sizeof(struct oct_intrmod_resp)); + + ctx = (struct oct_intrmod_context *)sc->ctxptr; + memset(ctx, 0, sizeof(struct oct_intrmod_context)); + WRITE_ONCE(ctx->cond, 0); + ctx->octeon_id = lio_get_device_id(oct_dev); + init_waitqueue_head(&ctx->wc); + + sc->iq_no = lio->linfo.txpciq[0].s.q_no; + + octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC, + OPCODE_NIC_INTRMOD_PARAMS, 0, 0, 0); + + sc->callback = octnet_intrmod_callback; + sc->callback_arg = sc; + sc->wait_time = 1000; + + retval = octeon_send_soft_command(oct_dev, sc); + if (retval == IQ_SEND_FAILED) { + octeon_free_soft_command(oct_dev, sc); return -EINVAL; } - if (intrmod_cfg->rx_enable) { - intr_coal->use_adaptive_rx_coalesce = - intrmod_cfg->rx_enable; - intr_coal->rate_sample_interval = - intrmod_cfg->check_intrvl; - intr_coal->pkt_rate_high = - intrmod_cfg->maxpkt_ratethr; - intr_coal->pkt_rate_low = - intrmod_cfg->minpkt_ratethr; - intr_coal->rx_max_coalesced_frames_high = - intrmod_cfg->rx_maxcnt_trigger; - intr_coal->rx_coalesce_usecs_high = - intrmod_cfg->rx_maxtmr_trigger; - intr_coal->rx_coalesce_usecs_low = - intrmod_cfg->rx_mintmr_trigger; - intr_coal->rx_max_coalesced_frames_low = - intrmod_cfg->rx_mincnt_trigger; + + /* Sleep on a wait queue till the cond flag indicates that the + * response arrived or timed-out. + */ + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) { + dev_err(&oct_dev->pci_dev->dev, "Wait interrupted\n"); + goto intrmod_info_wait_intr; } - if ((OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) && - (intrmod_cfg->tx_enable)) { - intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable; - intr_coal->tx_max_coalesced_frames_high = - intrmod_cfg->tx_maxcnt_trigger; - intr_coal->tx_max_coalesced_frames_low = - intrmod_cfg->tx_mincnt_trigger; + + retval = ctx->status || resp->status; + if (retval) { + dev_err(&oct_dev->pci_dev->dev, + "Get interrupt moderation parameters failed\n"); + goto intrmod_info_wait_fail; } - return 0; -} -/* Callback function for intrmod */ -static void octnet_intrmod_callback(struct octeon_device *oct_dev, - u32 status, - void *ptr) -{ - struct oct_intrmod_cmd *cmd = ptr; - struct octeon_soft_command *sc = cmd->sc; + octeon_swap_8B_data((u64 *)&resp->intrmod, + (sizeof(struct oct_intrmod_cfg)) / 8); + memcpy(intr_cfg, &resp->intrmod, sizeof(struct oct_intrmod_cfg)); + octeon_free_soft_command(oct_dev, sc); - oct_dev = cmd->oct_dev; + return 0; - if (status) - dev_err(&oct_dev->pci_dev->dev, "intrmod config failed. Status: %llx\n", - CVM_CAST64(status)); - else - dev_info(&oct_dev->pci_dev->dev, - "Rx-Adaptive Interrupt moderation enabled:%llx\n", - oct_dev->intrmod.rx_enable); +intrmod_info_wait_fail: octeon_free_soft_command(oct_dev, sc); + +intrmod_info_wait_intr: + + return -ENODEV; } /* Configure interrupt moderation parameters */ @@ -1388,7 +1413,7 @@ static int octnet_set_intrmod_cfg(struct lio *lio, struct oct_intrmod_cfg *intr_cfg) { struct octeon_soft_command *sc; - struct oct_intrmod_cmd *cmd; + struct oct_intrmod_context *ctx; struct oct_intrmod_cfg *cfg; int retval; struct octeon_device *oct_dev = lio->oct_dev; @@ -1398,19 +1423,21 @@ static int octnet_set_intrmod_cfg(struct lio *lio, octeon_alloc_soft_command(oct_dev, sizeof(struct oct_intrmod_cfg), 0, - sizeof(struct oct_intrmod_cmd)); + sizeof(struct oct_intrmod_context)); if (!sc) return -ENOMEM; - cmd = (struct oct_intrmod_cmd *)sc->ctxptr; + ctx = (struct oct_intrmod_context *)sc->ctxptr; + + WRITE_ONCE(ctx->cond, 0); + ctx->octeon_id = lio_get_device_id(oct_dev); + init_waitqueue_head(&ctx->wc); + cfg = (struct oct_intrmod_cfg *)sc->virtdptr; memcpy(cfg, intr_cfg, sizeof(struct oct_intrmod_cfg)); octeon_swap_8B_data((u64 *)cfg, (sizeof(struct oct_intrmod_cfg)) / 8); - cmd->sc = sc; - cmd->cfg = cfg; - cmd->oct_dev = oct_dev; sc->iq_no = lio->linfo.txpciq[0].s.q_no; @@ -1418,7 +1445,7 @@ static int octnet_set_intrmod_cfg(struct lio *lio, OPCODE_NIC_INTRMOD_CFG, 0, 0, 0); sc->callback = octnet_intrmod_callback; - sc->callback_arg = cmd; + sc->callback_arg = sc; sc->wait_time = 1000; retval = octeon_send_soft_command(oct_dev, sc); @@ -1427,7 +1454,29 @@ static int octnet_set_intrmod_cfg(struct lio *lio, return -EINVAL; } - return 0; + /* Sleep on a wait queue till the cond flag indicates that the + * response arrived or timed-out. + */ + if (sleep_cond(&ctx->wc, &ctx->cond) != -EINTR) { + retval = ctx->status; + if (retval) + dev_err(&oct_dev->pci_dev->dev, + "intrmod config failed. Status: %llx\n", + CVM_CAST64(retval)); + else + dev_info(&oct_dev->pci_dev->dev, + "Rx-Adaptive Interrupt moderation %s\n", + (intr_cfg->rx_enable) ? + "enabled" : "disabled"); + + octeon_free_soft_command(oct_dev, sc); + + return ((retval) ? -ENODEV : 0); + } + + dev_err(&oct_dev->pci_dev->dev, "iq/oq config failed\n"); + + return -EINTR; } static void @@ -1584,80 +1633,106 @@ static int octnet_get_link_stats(struct net_device *netdev) return 0; } -/* Enable/Disable auto interrupt Moderation */ -static int oct_cfg_adaptive_intr(struct lio *lio, struct ethtool_coalesce - *intr_coal) +static int lio_get_intr_coalesce(struct net_device *netdev, + struct ethtool_coalesce *intr_coal) { - int ret = 0; + struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct oct_intrmod_cfg *intrmod_cfg; - - intrmod_cfg = &oct->intrmod; - - if (oct->intrmod.rx_enable || oct->intrmod.tx_enable) { - if (intr_coal->rate_sample_interval) - intrmod_cfg->check_intrvl = - intr_coal->rate_sample_interval; - else - intrmod_cfg->check_intrvl = - LIO_INTRMOD_CHECK_INTERVAL; + struct octeon_instr_queue *iq; + struct oct_intrmod_cfg intrmod_cfg; - if (intr_coal->pkt_rate_high) - intrmod_cfg->maxpkt_ratethr = - intr_coal->pkt_rate_high; - else - intrmod_cfg->maxpkt_ratethr = - LIO_INTRMOD_MAXPKT_RATETHR; + if (octnet_get_intrmod_cfg(lio, &intrmod_cfg)) + return -ENODEV; - if (intr_coal->pkt_rate_low) - intrmod_cfg->minpkt_ratethr = - intr_coal->pkt_rate_low; - else - intrmod_cfg->minpkt_ratethr = - LIO_INTRMOD_MINPKT_RATETHR; + switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + case OCTEON_CN23XX_VF_VID: { + if (!intrmod_cfg.rx_enable) { + intr_coal->rx_coalesce_usecs = oct->rx_coalesce_usecs; + intr_coal->rx_max_coalesced_frames = + oct->rx_max_coalesced_frames; + } + if (!intrmod_cfg.tx_enable) + intr_coal->tx_max_coalesced_frames = + oct->tx_max_coalesced_frames; + break; } - if (oct->intrmod.rx_enable) { - if (intr_coal->rx_max_coalesced_frames_high) - intrmod_cfg->rx_maxcnt_trigger = - intr_coal->rx_max_coalesced_frames_high; - else - intrmod_cfg->rx_maxcnt_trigger = - LIO_INTRMOD_RXMAXCNT_TRIGGER; + case OCTEON_CN68XX: + case OCTEON_CN66XX: { + struct octeon_cn6xxx *cn6xxx = + (struct octeon_cn6xxx *)oct->chip; - if (intr_coal->rx_coalesce_usecs_high) - intrmod_cfg->rx_maxtmr_trigger = - intr_coal->rx_coalesce_usecs_high; - else - intrmod_cfg->rx_maxtmr_trigger = - LIO_INTRMOD_RXMAXTMR_TRIGGER; + if (!intrmod_cfg.rx_enable) { + intr_coal->rx_coalesce_usecs = + CFG_GET_OQ_INTR_TIME(cn6xxx->conf); + intr_coal->rx_max_coalesced_frames = + CFG_GET_OQ_INTR_PKT(cn6xxx->conf); + } + iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no]; + intr_coal->tx_max_coalesced_frames = iq->fill_threshold; + break; + } + default: + netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + return -EINVAL; + } + if (intrmod_cfg.rx_enable) { + intr_coal->use_adaptive_rx_coalesce = + intrmod_cfg.rx_enable; + intr_coal->rate_sample_interval = + intrmod_cfg.check_intrvl; + intr_coal->pkt_rate_high = + intrmod_cfg.maxpkt_ratethr; + intr_coal->pkt_rate_low = + intrmod_cfg.minpkt_ratethr; + intr_coal->rx_max_coalesced_frames_high = + intrmod_cfg.rx_maxcnt_trigger; + intr_coal->rx_coalesce_usecs_high = + intrmod_cfg.rx_maxtmr_trigger; + intr_coal->rx_coalesce_usecs_low = + intrmod_cfg.rx_mintmr_trigger; + intr_coal->rx_max_coalesced_frames_low = + intrmod_cfg.rx_mincnt_trigger; + } + if ((OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) && + (intrmod_cfg.tx_enable)) { + intr_coal->use_adaptive_tx_coalesce = + intrmod_cfg.tx_enable; + intr_coal->tx_max_coalesced_frames_high = + intrmod_cfg.tx_maxcnt_trigger; + intr_coal->tx_max_coalesced_frames_low = + intrmod_cfg.tx_mincnt_trigger; + } + return 0; +} - if (intr_coal->rx_coalesce_usecs_low) - intrmod_cfg->rx_mintmr_trigger = - intr_coal->rx_coalesce_usecs_low; - else - intrmod_cfg->rx_mintmr_trigger = - LIO_INTRMOD_RXMINTMR_TRIGGER; +/* Enable/Disable auto interrupt Moderation */ +static int oct_cfg_adaptive_intr(struct lio *lio, + struct oct_intrmod_cfg *intrmod_cfg, + struct ethtool_coalesce *intr_coal) +{ + int ret = 0; - if (intr_coal->rx_max_coalesced_frames_low) - intrmod_cfg->rx_mincnt_trigger = - intr_coal->rx_max_coalesced_frames_low; - else - intrmod_cfg->rx_mincnt_trigger = - LIO_INTRMOD_RXMINCNT_TRIGGER; + if (intrmod_cfg->rx_enable || intrmod_cfg->tx_enable) { + intrmod_cfg->check_intrvl = intr_coal->rate_sample_interval; + intrmod_cfg->maxpkt_ratethr = intr_coal->pkt_rate_high; + intrmod_cfg->minpkt_ratethr = intr_coal->pkt_rate_low; } - if (oct->intrmod.tx_enable) { - if (intr_coal->tx_max_coalesced_frames_high) - intrmod_cfg->tx_maxcnt_trigger = - intr_coal->tx_max_coalesced_frames_high; - else - intrmod_cfg->tx_maxcnt_trigger = - LIO_INTRMOD_TXMAXCNT_TRIGGER; - if (intr_coal->tx_max_coalesced_frames_low) - intrmod_cfg->tx_mincnt_trigger = - intr_coal->tx_max_coalesced_frames_low; - else - intrmod_cfg->tx_mincnt_trigger = - LIO_INTRMOD_TXMINCNT_TRIGGER; + if (intrmod_cfg->rx_enable) { + intrmod_cfg->rx_maxcnt_trigger = + intr_coal->rx_max_coalesced_frames_high; + intrmod_cfg->rx_maxtmr_trigger = + intr_coal->rx_coalesce_usecs_high; + intrmod_cfg->rx_mintmr_trigger = + intr_coal->rx_coalesce_usecs_low; + intrmod_cfg->rx_mincnt_trigger = + intr_coal->rx_max_coalesced_frames_low; + } + if (intrmod_cfg->tx_enable) { + intrmod_cfg->tx_maxcnt_trigger = + intr_coal->tx_max_coalesced_frames_high; + intrmod_cfg->tx_mincnt_trigger = + intr_coal->tx_max_coalesced_frames_low; } ret = octnet_set_intrmod_cfg(lio, intrmod_cfg); @@ -1666,7 +1741,9 @@ static int oct_cfg_adaptive_intr(struct lio *lio, struct ethtool_coalesce } static int -oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) +oct_cfg_rx_intrcnt(struct lio *lio, + struct oct_intrmod_cfg *intrmod, + struct ethtool_coalesce *intr_coal) { struct octeon_device *oct = lio->oct_dev; u32 rx_max_coalesced_frames; @@ -1692,7 +1769,7 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) int q_no; if (!intr_coal->rx_max_coalesced_frames) - rx_max_coalesced_frames = oct->intrmod.rx_frames; + rx_max_coalesced_frames = intrmod->rx_frames; else rx_max_coalesced_frames = intr_coal->rx_max_coalesced_frames; @@ -1703,17 +1780,18 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) (octeon_read_csr64( oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) & (0x3fffff00000000UL)) | - rx_max_coalesced_frames); + (rx_max_coalesced_frames - 1)); /*consider setting resend bit*/ } - oct->intrmod.rx_frames = rx_max_coalesced_frames; + intrmod->rx_frames = rx_max_coalesced_frames; + oct->rx_max_coalesced_frames = rx_max_coalesced_frames; break; } case OCTEON_CN23XX_VF_VID: { int q_no; if (!intr_coal->rx_max_coalesced_frames) - rx_max_coalesced_frames = oct->intrmod.rx_frames; + rx_max_coalesced_frames = intrmod->rx_frames; else rx_max_coalesced_frames = intr_coal->rx_max_coalesced_frames; @@ -1724,9 +1802,10 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no)) & (0x3fffff00000000UL)) | rx_max_coalesced_frames); - /* consider writing to resend bit here */ + /*consider writing to resend bit here*/ } - oct->intrmod.rx_frames = rx_max_coalesced_frames; + intrmod->rx_frames = rx_max_coalesced_frames; + oct->rx_max_coalesced_frames = rx_max_coalesced_frames; break; } default: @@ -1736,6 +1815,7 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) } static int oct_cfg_rx_intrtime(struct lio *lio, + struct oct_intrmod_cfg *intrmod, struct ethtool_coalesce *intr_coal) { struct octeon_device *oct = lio->oct_dev; @@ -1766,7 +1846,7 @@ static int oct_cfg_rx_intrtime(struct lio *lio, int q_no; if (!intr_coal->rx_coalesce_usecs) - rx_coalesce_usecs = oct->intrmod.rx_usecs; + rx_coalesce_usecs = intrmod->rx_usecs; else rx_coalesce_usecs = intr_coal->rx_coalesce_usecs; time_threshold = @@ -1775,11 +1855,12 @@ static int oct_cfg_rx_intrtime(struct lio *lio, q_no += oct->sriov_info.pf_srn; octeon_write_csr64(oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), - (oct->intrmod.rx_frames | - (time_threshold << 32))); + (intrmod->rx_frames | + ((u64)time_threshold << 32))); /*consider writing to resend bit here*/ } - oct->intrmod.rx_usecs = rx_coalesce_usecs; + intrmod->rx_usecs = rx_coalesce_usecs; + oct->rx_coalesce_usecs = rx_coalesce_usecs; break; } case OCTEON_CN23XX_VF_VID: { @@ -1787,7 +1868,7 @@ static int oct_cfg_rx_intrtime(struct lio *lio, int q_no; if (!intr_coal->rx_coalesce_usecs) - rx_coalesce_usecs = oct->intrmod.rx_usecs; + rx_coalesce_usecs = intrmod->rx_usecs; else rx_coalesce_usecs = intr_coal->rx_coalesce_usecs; @@ -1796,11 +1877,12 @@ static int oct_cfg_rx_intrtime(struct lio *lio, for (q_no = 0; q_no < oct->num_oqs; q_no++) { octeon_write_csr64( oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no), - (oct->intrmod.rx_frames | - (time_threshold << 32))); - /* consider setting resend bit */ + (intrmod->rx_frames | + ((u64)time_threshold << 32))); + /*consider setting resend bit*/ } - oct->intrmod.rx_usecs = rx_coalesce_usecs; + intrmod->rx_usecs = rx_coalesce_usecs; + oct->rx_coalesce_usecs = rx_coalesce_usecs; break; } default: @@ -1811,8 +1893,9 @@ static int oct_cfg_rx_intrtime(struct lio *lio, } static int -oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal - __attribute__((unused))) +oct_cfg_tx_intrcnt(struct lio *lio, + struct oct_intrmod_cfg *intrmod, + struct ethtool_coalesce *intr_coal) { struct octeon_device *oct = lio->oct_dev; u32 iq_intr_pkt; @@ -1839,12 +1922,13 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal val = readq(inst_cnt_reg); /*clear wmark and count.dont want to write count back*/ val = (val & 0xFFFF000000000000ULL) | - ((u64)iq_intr_pkt + ((u64)(iq_intr_pkt - 1) << CN23XX_PKT_IN_DONE_WMARK_BIT_POS); writeq(val, inst_cnt_reg); /*consider setting resend bit*/ } - oct->intrmod.tx_frames = iq_intr_pkt; + intrmod->tx_frames = iq_intr_pkt; + oct->tx_max_coalesced_frames = iq_intr_pkt; break; } default: @@ -1859,6 +1943,7 @@ static int lio_set_intr_coalesce(struct net_device *netdev, struct lio *lio = GET_LIO(netdev); int ret; struct octeon_device *oct = lio->oct_dev; + struct oct_intrmod_cfg intrmod = {0}; u32 j, q_no; int db_max, db_min; @@ -1877,8 +1962,8 @@ static int lio_set_intr_coalesce(struct net_device *netdev, } else { dev_err(&oct->pci_dev->dev, "LIQUIDIO: Invalid tx-frames:%d. Range is min:%d max:%d\n", - intr_coal->tx_max_coalesced_frames, db_min, - db_max); + intr_coal->tx_max_coalesced_frames, + db_min, db_max); return -EINVAL; } break; @@ -1889,24 +1974,36 @@ static int lio_set_intr_coalesce(struct net_device *netdev, return -EINVAL; } - oct->intrmod.rx_enable = intr_coal->use_adaptive_rx_coalesce ? 1 : 0; - oct->intrmod.tx_enable = intr_coal->use_adaptive_tx_coalesce ? 1 : 0; + intrmod.rx_enable = intr_coal->use_adaptive_rx_coalesce ? 1 : 0; + intrmod.tx_enable = intr_coal->use_adaptive_tx_coalesce ? 1 : 0; + intrmod.rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct)); + intrmod.rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct)); + intrmod.tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct)); - ret = oct_cfg_adaptive_intr(lio, intr_coal); + ret = oct_cfg_adaptive_intr(lio, &intrmod, intr_coal); if (!intr_coal->use_adaptive_rx_coalesce) { - ret = oct_cfg_rx_intrtime(lio, intr_coal); + ret = oct_cfg_rx_intrtime(lio, &intrmod, intr_coal); if (ret) goto ret_intrmod; - ret = oct_cfg_rx_intrcnt(lio, intr_coal); + ret = oct_cfg_rx_intrcnt(lio, &intrmod, intr_coal); if (ret) goto ret_intrmod; + } else { + oct->rx_coalesce_usecs = + CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct)); + oct->rx_max_coalesced_frames = + CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct)); } + if (!intr_coal->use_adaptive_tx_coalesce) { - ret = oct_cfg_tx_intrcnt(lio, intr_coal); + ret = oct_cfg_tx_intrcnt(lio, &intrmod, intr_coal); if (ret) goto ret_intrmod; + } else { + oct->tx_max_coalesced_frames = + CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct)); } return 0; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 92f46b1375c3..927617cbf6a9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -16,6 +16,7 @@ * NONINFRINGEMENT. See the GNU General Public License for more details. ***********************************************************************/ #include <linux/module.h> +#include <linux/interrupt.h> #include <linux/pci.h> #include <linux/firmware.h> #include <net/vxlan.h> @@ -60,12 +61,6 @@ MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\""); static int ptp_enable = 1; -/* Bit mask values for lio->ifstate */ -#define LIO_IFSTATE_DROQ_OPS 0x01 -#define LIO_IFSTATE_REGISTERED 0x02 -#define LIO_IFSTATE_RUNNING 0x04 -#define LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08 - /* Polling interval for determining when NIC application is alive */ #define LIQUIDIO_STARTER_POLL_INTERVAL_MS 100 @@ -178,6 +173,8 @@ static int liquidio_stop(struct net_device *netdev); static void liquidio_remove(struct pci_dev *pdev); static int liquidio_probe(struct pci_dev *pdev, const struct pci_device_id *ent); +static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx, + int linkstate); static struct handshake handshake[MAX_OCTEON_DEVICES]; static struct completion first_stage; @@ -531,36 +528,6 @@ static void liquidio_deinit_pci(void) } /** - * \brief check interface state - * @param lio per-network private data - * @param state_flag flag state to check - */ -static inline int ifstate_check(struct lio *lio, int state_flag) -{ - return atomic_read(&lio->ifstate) & state_flag; -} - -/** - * \brief set interface state - * @param lio per-network private data - * @param state_flag flag state to set - */ -static inline void ifstate_set(struct lio *lio, int state_flag) -{ - atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag)); -} - -/** - * \brief clear interface state - * @param lio per-network private data - * @param state_flag flag state to clear - */ -static inline void ifstate_reset(struct lio *lio, int state_flag) -{ - atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag))); -} - -/** * \brief Stop Tx queues * @param netdev network device */ @@ -748,7 +715,8 @@ static void delete_glists(struct lio *lio) kfree(g); } while (g); - if (lio->glists_virt_base && lio->glists_virt_base[i]) { + if (lio->glists_virt_base && lio->glists_virt_base[i] && + lio->glists_dma_base && lio->glists_dma_base[i]) { lio_dma_free(lio->oct_dev, lio->glist_entry_size * lio->tx_qsize, lio->glists_virt_base[i], @@ -805,7 +773,7 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) } for (i = 0; i < num_iqs; i++) { - int numa_node = cpu_to_node(i % num_online_cpus()); + int numa_node = dev_to_node(&oct->pci_dev->dev); spin_lock_init(&lio->glist_lock[i]); @@ -967,14 +935,13 @@ static void update_txq_status(struct octeon_device *oct, int iq_num) INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num, tx_restart, 1); netif_wake_subqueue(netdev, iq->q_index); - } else { - if (!octnet_iq_is_full(oct, lio->txq)) { - INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, - lio->txq, - tx_restart, 1); - wake_q(netdev, lio->txq); - } } + } else if (netif_queue_stopped(netdev) && + lio->linfo.link.s.link_up && + (!octnet_iq_is_full(oct, lio->txq))) { + INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, + lio->txq, tx_restart, 1); + netif_wake_queue(netdev); } } @@ -1084,16 +1051,35 @@ static int octeon_setup_interrupt(struct octeon_device *oct) int i; int num_ioq_vectors; int num_alloc_ioq_vectors; + char *queue_irq_names = NULL; + char *aux_irq_name = NULL; if (OCTEON_CN23XX_PF(oct) && oct->msix_on) { oct->num_msix_irqs = oct->sriov_info.num_pf_rings; /* one non ioq interrupt for handling sli_mac_pf_int_sum */ oct->num_msix_irqs += 1; + /* allocate storage for the names assigned to each irq */ + oct->irq_name_storage = + kcalloc((MAX_IOQ_INTERRUPTS_PER_PF + 1), INTRNAMSIZ, + GFP_KERNEL); + if (!oct->irq_name_storage) { + dev_err(&oct->pci_dev->dev, "Irq name storage alloc failed...\n"); + return -ENOMEM; + } + + queue_irq_names = oct->irq_name_storage; + aux_irq_name = &queue_irq_names + [IRQ_NAME_OFF(MAX_IOQ_INTERRUPTS_PER_PF)]; + oct->msix_entries = kcalloc( oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL); - if (!oct->msix_entries) - return 1; + if (!oct->msix_entries) { + dev_err(&oct->pci_dev->dev, "Memory Alloc failed...\n"); + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return -ENOMEM; + } msix_entries = (struct msix_entry *)oct->msix_entries; /*Assumption is that pf msix vectors start from pf srn to pf to @@ -1111,7 +1097,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct) dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n"); kfree(oct->msix_entries); oct->msix_entries = NULL; - return 1; + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return num_alloc_ioq_vectors; } dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n"); @@ -1119,9 +1107,12 @@ static int octeon_setup_interrupt(struct octeon_device *oct) /** For PF, there is one non-ioq interrupt handler */ num_ioq_vectors -= 1; + + snprintf(aux_irq_name, INTRNAMSIZ, + "LiquidIO%u-pf%u-aux", oct->octeon_id, oct->pf_num); irqret = request_irq(msix_entries[num_ioq_vectors].vector, - liquidio_legacy_intr_handler, 0, "octeon", - oct); + liquidio_legacy_intr_handler, 0, + aux_irq_name, oct); if (irqret) { dev_err(&oct->pci_dev->dev, "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n", @@ -1129,13 +1120,20 @@ static int octeon_setup_interrupt(struct octeon_device *oct) pci_disable_msix(oct->pci_dev); kfree(oct->msix_entries); oct->msix_entries = NULL; - return 1; + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return irqret; } for (i = 0; i < num_ioq_vectors; i++) { + snprintf(&queue_irq_names[IRQ_NAME_OFF(i)], INTRNAMSIZ, + "LiquidIO%u-pf%u-rxtx-%u", + oct->octeon_id, oct->pf_num, i); + irqret = request_irq(msix_entries[i].vector, liquidio_msix_intr_handler, 0, - "octeon", &oct->ioq_vector[i]); + &queue_irq_names[IRQ_NAME_OFF(i)], + &oct->ioq_vector[i]); if (irqret) { dev_err(&oct->pci_dev->dev, "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n", @@ -1155,7 +1153,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct) pci_disable_msix(oct->pci_dev); kfree(oct->msix_entries); oct->msix_entries = NULL; - return 1; + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return irqret; } oct->ioq_vector[i].vector = msix_entries[i].vector; /* assign the cpu mask for this msix interrupt vector */ @@ -1173,111 +1173,150 @@ static int octeon_setup_interrupt(struct octeon_device *oct) else oct->flags |= LIO_FLAG_MSI_ENABLED; + /* allocate storage for the names assigned to the irq */ + oct->irq_name_storage = kcalloc(1, INTRNAMSIZ, GFP_KERNEL); + if (!oct->irq_name_storage) + return -ENOMEM; + + queue_irq_names = oct->irq_name_storage; + + snprintf(&queue_irq_names[IRQ_NAME_OFF(0)], INTRNAMSIZ, + "LiquidIO%u-pf%u-rxtx-%u", + oct->octeon_id, oct->pf_num, 0); + irqret = request_irq(oct->pci_dev->irq, - liquidio_legacy_intr_handler, IRQF_SHARED, - "octeon", oct); + liquidio_legacy_intr_handler, + IRQF_SHARED, + &queue_irq_names[IRQ_NAME_OFF(0)], oct); if (irqret) { if (oct->flags & LIO_FLAG_MSI_ENABLED) pci_disable_msi(oct->pci_dev); dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n", irqret); - return 1; + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return irqret; } } return 0; } +static struct octeon_device *get_other_octeon_device(struct octeon_device *oct) +{ + struct octeon_device *other_oct; + + other_oct = lio_get_device(oct->octeon_id + 1); + + if (other_oct && other_oct->pci_dev) { + int oct_busnum, other_oct_busnum; + + oct_busnum = oct->pci_dev->bus->number; + other_oct_busnum = other_oct->pci_dev->bus->number; + + if (oct_busnum == other_oct_busnum) { + int oct_slot, other_oct_slot; + + oct_slot = PCI_SLOT(oct->pci_dev->devfn); + other_oct_slot = PCI_SLOT(other_oct->pci_dev->devfn); + + if (oct_slot == other_oct_slot) + return other_oct; + } + } + + return NULL; +} + +static void disable_all_vf_links(struct octeon_device *oct) +{ + struct net_device *netdev; + int max_vfs, vf, i; + + if (!oct) + return; + + max_vfs = oct->sriov_info.max_vfs; + + for (i = 0; i < oct->ifcount; i++) { + netdev = oct->props[i].netdev; + if (!netdev) + continue; + + for (vf = 0; vf < max_vfs; vf++) + liquidio_set_vf_link_state(netdev, vf, + IFLA_VF_LINK_STATE_DISABLE); + } +} + static int liquidio_watchdog(void *param) { - u64 wdog; - u16 mask_of_stuck_cores = 0; - u16 mask_of_crashed_cores = 0; - int core_num; - u8 core_is_stuck[LIO_MAX_CORES]; - u8 core_crashed[LIO_MAX_CORES]; + bool err_msg_was_printed[LIO_MAX_CORES]; + u16 mask_of_crashed_or_stuck_cores = 0; + bool all_vf_links_are_disabled = false; struct octeon_device *oct = param; + struct octeon_device *other_oct; +#ifdef CONFIG_MODULE_UNLOAD + long refcount, vfs_referencing_pf; + u64 vfs_mask1, vfs_mask2; +#endif + int core; - memset(core_is_stuck, 0, sizeof(core_is_stuck)); - memset(core_crashed, 0, sizeof(core_crashed)); + memset(err_msg_was_printed, 0, sizeof(err_msg_was_printed)); while (!kthread_should_stop()) { - mask_of_crashed_cores = + /* sleep for a couple of seconds so that we don't hog the CPU */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(2000)); + + mask_of_crashed_or_stuck_cores = (u16)octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2); - for (core_num = 0; core_num < LIO_MAX_CORES; core_num++) { - if (!core_is_stuck[core_num]) { - wdog = lio_pci_readq(oct, CIU3_WDOG(core_num)); - - /* look at watchdog state field */ - wdog &= CIU3_WDOG_MASK; - if (wdog) { - /* this watchdog timer has expired */ - core_is_stuck[core_num] = - LIO_MONITOR_WDOG_EXPIRE; - mask_of_stuck_cores |= (1 << core_num); - } - } + if (!mask_of_crashed_or_stuck_cores) + continue; - if (!core_crashed[core_num]) - core_crashed[core_num] = - (mask_of_crashed_cores >> core_num) & 1; - } + WRITE_ONCE(oct->cores_crashed, true); + other_oct = get_other_octeon_device(oct); + if (other_oct) + WRITE_ONCE(other_oct->cores_crashed, true); - if (mask_of_stuck_cores) { - for (core_num = 0; core_num < LIO_MAX_CORES; - core_num++) { - if (core_is_stuck[core_num] == 1) { - dev_err(&oct->pci_dev->dev, - "ERROR: Octeon core %d is stuck!\n", - core_num); - /* 2 means we have printk'd an error - * so no need to repeat the same printk - */ - core_is_stuck[core_num] = - LIO_MONITOR_CORE_STUCK_MSGD; - } - } - } + for (core = 0; core < LIO_MAX_CORES; core++) { + bool core_crashed_or_got_stuck; - if (mask_of_crashed_cores) { - for (core_num = 0; core_num < LIO_MAX_CORES; - core_num++) { - if (core_crashed[core_num] == 1) { - dev_err(&oct->pci_dev->dev, - "ERROR: Octeon core %d crashed! See oct-fwdump for details.\n", - core_num); - /* 2 means we have printk'd an error - * so no need to repeat the same printk - */ - core_crashed[core_num] = - LIO_MONITOR_CORE_STUCK_MSGD; - } + core_crashed_or_got_stuck = + (mask_of_crashed_or_stuck_cores + >> core) & 1; + + if (core_crashed_or_got_stuck && + !err_msg_was_printed[core]) { + dev_err(&oct->pci_dev->dev, + "ERROR: Octeon core %d crashed or got stuck! See oct-fwdump for details.\n", + core); + err_msg_was_printed[core] = true; } } + + if (all_vf_links_are_disabled) + continue; + + disable_all_vf_links(oct); + disable_all_vf_links(other_oct); + all_vf_links_are_disabled = true; + #ifdef CONFIG_MODULE_UNLOAD - if (mask_of_stuck_cores || mask_of_crashed_cores) { - /* make module refcount=0 so that rmmod will work */ - long refcount; + vfs_mask1 = READ_ONCE(oct->sriov_info.vf_drv_loaded_mask); + vfs_mask2 = READ_ONCE(other_oct->sriov_info.vf_drv_loaded_mask); - refcount = module_refcount(THIS_MODULE); + vfs_referencing_pf = hweight64(vfs_mask1); + vfs_referencing_pf += hweight64(vfs_mask2); - while (refcount > 0) { + refcount = module_refcount(THIS_MODULE); + if (refcount >= vfs_referencing_pf) { + while (vfs_referencing_pf) { module_put(THIS_MODULE); - refcount = module_refcount(THIS_MODULE); - } - - /* compensate for and withstand an unlikely (but still - * possible) race condition - */ - while (refcount < 0) { - try_module_get(THIS_MODULE); - refcount = module_refcount(THIS_MODULE); + vfs_referencing_pf--; } } #endif - /* sleep for two seconds */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(2 * HZ); } return 0; @@ -1369,6 +1408,12 @@ liquidio_probe(struct pci_dev *pdev, return 0; } +static bool fw_type_is_none(void) +{ + return strncmp(fw_type, LIO_FW_NAME_TYPE_NONE, + sizeof(LIO_FW_NAME_TYPE_NONE)) == 0; +} + /** *\brief Destroy resources associated with octeon device * @param pdev PCI device structure @@ -1449,6 +1494,9 @@ static void octeon_destroy_resources(struct octeon_device *oct) pci_disable_msi(oct->pci_dev); } + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + /* fallthrough */ case OCT_DEV_MSIX_ALLOC_VECTOR_DONE: if (OCTEON_CN23XX_PF(oct)) @@ -1508,9 +1556,12 @@ static void octeon_destroy_resources(struct octeon_device *oct) /* fallthrough */ case OCT_DEV_PCI_MAP_DONE: - /* Soft reset the octeon device before exiting */ - if ((!OCTEON_CN23XX_PF(oct)) || !oct->octeon_id) - oct->fn_list.soft_reset(oct); + if (!fw_type_is_none()) { + /* Soft reset the octeon device before exiting */ + if (!OCTEON_CN23XX_PF(oct) || + (OCTEON_CN23XX_PF(oct) && !oct->octeon_id)) + oct->fn_list.soft_reset(oct); + } octeon_unmap_pci_barx(oct, 0); octeon_unmap_pci_barx(oct, 1); @@ -1643,6 +1694,15 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) liquidio_stop(netdev); + if (fw_type_is_none()) { + struct octnic_ctrl_pkt nctrl; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + nctrl.ncmd.s.cmd = OCTNET_CMD_RESET_PF; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + octnet_send_nic_ctrl_pkt(oct, &nctrl); + } + if (oct->props[lio->ifidx].napi_enabled == 1) { list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) napi_disable(napi); @@ -1658,6 +1718,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) cleanup_link_status_change_wq(netdev); + cleanup_rx_oom_poll_fn(netdev); + delete_glists(lio); free_netdev(netdev); @@ -2126,8 +2188,7 @@ static int load_firmware(struct octeon_device *oct) char fw_name[LIO_MAX_FW_FILENAME_LEN]; char *tmp_fw_type; - if (strncmp(fw_type, LIO_FW_NAME_TYPE_NONE, - sizeof(LIO_FW_NAME_TYPE_NONE)) == 0) { + if (fw_type_is_none()) { dev_info(&oct->pci_dev->dev, "Skipping firmware load\n"); return ret; } @@ -2211,8 +2272,8 @@ static void if_cfg_callback(struct octeon_device *oct, oct = lio_get_device(ctx->octeon_id); if (resp->status) - dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n", - CVM_CAST64(resp->status)); + dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: 0x%llx (0x%08x)\n", + CVM_CAST64(resp->status), status); WRITE_ONCE(ctx->cond, 1); snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s", @@ -2437,8 +2498,11 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget) /* Flush the instruction queue */ iq = oct->instr_queue[iq_no]; if (iq) { - /* Process iq buffers with in the budget limits */ - tx_done = octeon_flush_iq(oct, iq, budget); + if (atomic_read(&iq->instr_pending)) + /* Process iq buffers with in the budget limits */ + tx_done = octeon_flush_iq(oct, iq, budget); + else + tx_done = 1; /* Update iq read-index rather than waiting for next interrupt. * Return back if tx_done is false. */ @@ -2555,6 +2619,15 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, __func__); return 1; } + + if (octeon_dev->ioq_vector) { + struct octeon_ioq_vector *ioq_vector; + + ioq_vector = &octeon_dev->ioq_vector[q]; + netif_set_xps_queue(netdev, + &ioq_vector->affinity_mask, + ioq_vector->iq_index); + } } return 0; @@ -3426,6 +3499,8 @@ static int liquidio_set_rxcsum_command(struct net_device *netdev, int command, struct octnic_ctrl_pkt nctrl; int ret = 0; + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + nctrl.ncmd.u64 = 0; nctrl.ncmd.s.cmd = command; nctrl.ncmd.s.param1 = rx_cmd; @@ -3459,6 +3534,8 @@ static int liquidio_vxlan_port_command(struct net_device *netdev, int command, struct octnic_ctrl_pkt nctrl; int ret = 0; + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + nctrl.ncmd.u64 = 0; nctrl.ncmd.s.cmd = command; nctrl.ncmd.s.more = vxlan_cmd_bit; @@ -3596,7 +3673,8 @@ static int __liquidio_set_vf_mac(struct net_device *netdev, int vfidx, nctrl.ncmd.s.param2 = (is_admin_assigned ? 1 : 0); nctrl.ncmd.s.more = 1; nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; - nctrl.cb_fn = 0; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; nctrl.wait_time = LIO_CMD_WAIT_TM; nctrl.udd[0] = 0; @@ -4122,6 +4200,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) if (setup_link_status_change_wq(netdev)) goto setup_nic_dev_fail; + if (setup_rx_oom_poll_fn(netdev)) + goto setup_nic_dev_fail; + /* Register the network device with the OS */ if (register_netdev(netdev)) { dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n"); @@ -4271,7 +4352,6 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs) */ static int liquidio_init_nic_module(struct octeon_device *oct) { - struct oct_intrmod_cfg *intrmod_cfg; int i, retval = 0; int num_nic_ports = CFG_GET_NUM_NIC_PORTS(octeon_get_conf(oct)); @@ -4296,22 +4376,6 @@ static int liquidio_init_nic_module(struct octeon_device *oct) liquidio_ptp_init(oct); - /* Initialize interrupt moderation params */ - intrmod_cfg = &((struct octeon_device *)oct)->intrmod; - intrmod_cfg->rx_enable = 1; - intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; - intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR; - intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR; - intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER; - intrmod_cfg->rx_maxtmr_trigger = LIO_INTRMOD_RXMAXTMR_TRIGGER; - intrmod_cfg->rx_mintmr_trigger = LIO_INTRMOD_RXMINTMR_TRIGGER; - intrmod_cfg->rx_mincnt_trigger = LIO_INTRMOD_RXMINCNT_TRIGGER; - intrmod_cfg->tx_enable = 1; - intrmod_cfg->tx_maxcnt_trigger = LIO_INTRMOD_TXMAXCNT_TRIGGER; - intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER; - intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct)); - intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct)); - intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct)); dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n"); return retval; @@ -4373,6 +4437,7 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf) struct octeon_device *oct = (struct octeon_device *)buf; struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt; int i, notice, vf_idx; + bool cores_crashed; u64 *data, vf_num; notice = recv_pkt->rh.r.ossp; @@ -4383,19 +4448,23 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf) octeon_swap_8B_data(&vf_num, 1); vf_idx = (int)vf_num - 1; + cores_crashed = READ_ONCE(oct->cores_crashed); + if (notice == VF_DRV_LOADED) { if (!(oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vf_idx))) { oct->sriov_info.vf_drv_loaded_mask |= BIT_ULL(vf_idx); dev_info(&oct->pci_dev->dev, "driver for VF%d was loaded\n", vf_idx); - try_module_get(THIS_MODULE); + if (!cores_crashed) + try_module_get(THIS_MODULE); } } else if (notice == VF_DRV_REMOVED) { if (oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vf_idx)) { oct->sriov_info.vf_drv_loaded_mask &= ~BIT_ULL(vf_idx); dev_info(&oct->pci_dev->dev, "driver for VF%d was removed\n", vf_idx); - module_put(THIS_MODULE); + if (!cores_crashed) + module_put(THIS_MODULE); } } else if (notice == VF_DRV_MACADDR_CHANGED) { u8 *b = (u8 *)&data[1]; @@ -4447,14 +4516,16 @@ static int octeon_device_init(struct octeon_device *octeon_dev) if (OCTEON_CN23XX_PF(octeon_dev)) { if (!cn23xx_fw_loaded(octeon_dev)) { fw_loaded = 0; - /* Do a soft reset of the Octeon device. */ - if (octeon_dev->fn_list.soft_reset(octeon_dev)) - return 1; - /* things might have changed */ - if (!cn23xx_fw_loaded(octeon_dev)) - fw_loaded = 0; - else - fw_loaded = 1; + if (!fw_type_is_none()) { + /* Do a soft reset of the Octeon device. */ + if (octeon_dev->fn_list.soft_reset(octeon_dev)) + return 1; + /* things might have changed */ + if (!cn23xx_fw_loaded(octeon_dev)) + fw_loaded = 0; + else + fw_loaded = 1; + } } else { fw_loaded = 1; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 7b83be4ce1fe..34c77821fad9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -16,6 +16,7 @@ * NONINFRINGEMENT. See the GNU General Public License for more details. ***********************************************************************/ #include <linux/module.h> +#include <linux/interrupt.h> #include <linux/pci.h> #include <net/vxlan.h> #include "liquidio_common.h" @@ -39,12 +40,6 @@ MODULE_PARM_DESC(debug, "NETIF_MSG debug bits"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) -/* Bit mask values for lio->ifstate */ -#define LIO_IFSTATE_DROQ_OPS 0x01 -#define LIO_IFSTATE_REGISTERED 0x02 -#define LIO_IFSTATE_RUNNING 0x04 -#define LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08 - struct liquidio_if_cfg_context { int octeon_id; @@ -336,36 +331,6 @@ static struct pci_driver liquidio_vf_pci_driver = { }; /** - * \brief check interface state - * @param lio per-network private data - * @param state_flag flag state to check - */ -static int ifstate_check(struct lio *lio, int state_flag) -{ - return atomic_read(&lio->ifstate) & state_flag; -} - -/** - * \brief set interface state - * @param lio per-network private data - * @param state_flag flag state to set - */ -static void ifstate_set(struct lio *lio, int state_flag) -{ - atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag)); -} - -/** - * \brief clear interface state - * @param lio per-network private data - * @param state_flag flag state to clear - */ -static void ifstate_reset(struct lio *lio, int state_flag) -{ - atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag))); -} - -/** * \brief Stop Tx queues * @param netdev network device */ @@ -506,7 +471,8 @@ static void delete_glists(struct lio *lio) kfree(g); } while (g); - if (lio->glists_virt_base && lio->glists_virt_base[i]) { + if (lio->glists_virt_base && lio->glists_virt_base[i] && + lio->glists_dma_base && lio->glists_dma_base[i]) { lio_dma_free(lio->oct_dev, lio->glist_entry_size * lio->tx_qsize, lio->glists_virt_base[i], @@ -722,13 +688,12 @@ static void update_txq_status(struct octeon_device *oct, int iq_num) netif_wake_subqueue(netdev, iq->q_index); INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num, tx_restart, 1); - } else { - if (!octnet_iq_is_full(oct, lio->txq)) { - INCR_INSTRQUEUE_PKT_COUNT( - lio->oct_dev, lio->txq, tx_restart, 1); - wake_q(netdev, lio->txq); - } } + } else if (netif_queue_stopped(netdev) && lio->linfo.link.s.link_up && + (!octnet_iq_is_full(oct, lio->txq))) { + INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, + lio->txq, tx_restart, 1); + netif_wake_queue(netdev); } } @@ -780,6 +745,7 @@ liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev) static int octeon_setup_interrupt(struct octeon_device *oct) { struct msix_entry *msix_entries; + char *queue_irq_names = NULL; int num_alloc_ioq_vectors; int num_ioq_vectors; int irqret; @@ -788,10 +754,25 @@ static int octeon_setup_interrupt(struct octeon_device *oct) if (oct->msix_on) { oct->num_msix_irqs = oct->sriov_info.rings_per_vf; + /* allocate storage for the names assigned to each irq */ + oct->irq_name_storage = + kcalloc(MAX_IOQ_INTERRUPTS_PER_VF, INTRNAMSIZ, + GFP_KERNEL); + if (!oct->irq_name_storage) { + dev_err(&oct->pci_dev->dev, "Irq name storage alloc failed...\n"); + return -ENOMEM; + } + + queue_irq_names = oct->irq_name_storage; + oct->msix_entries = kcalloc( oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL); - if (!oct->msix_entries) - return 1; + if (!oct->msix_entries) { + dev_err(&oct->pci_dev->dev, "Memory Alloc failed...\n"); + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return -ENOMEM; + } msix_entries = (struct msix_entry *)oct->msix_entries; @@ -805,16 +786,23 @@ static int octeon_setup_interrupt(struct octeon_device *oct) dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n"); kfree(oct->msix_entries); oct->msix_entries = NULL; - return 1; + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return num_alloc_ioq_vectors; } dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n"); num_ioq_vectors = oct->num_msix_irqs; for (i = 0; i < num_ioq_vectors; i++) { + snprintf(&queue_irq_names[IRQ_NAME_OFF(i)], INTRNAMSIZ, + "LiquidIO%u-vf%u-rxtx-%u", + oct->octeon_id, oct->vf_num, i); + irqret = request_irq(msix_entries[i].vector, liquidio_msix_intr_handler, 0, - "octeon", &oct->ioq_vector[i]); + &queue_irq_names[IRQ_NAME_OFF(i)], + &oct->ioq_vector[i]); if (irqret) { dev_err(&oct->pci_dev->dev, "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n", @@ -830,7 +818,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct) pci_disable_msix(oct->pci_dev); kfree(oct->msix_entries); oct->msix_entries = NULL; - return 1; + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; + return irqret; } oct->ioq_vector[i].vector = msix_entries[i].vector; /* assign the cpu mask for this msix interrupt vector */ @@ -975,6 +965,8 @@ static void octeon_destroy_resources(struct octeon_device *oct) pci_disable_msix(oct->pci_dev); kfree(oct->msix_entries); oct->msix_entries = NULL; + kfree(oct->irq_name_storage); + oct->irq_name_storage = NULL; } /* Soft reset the octeon device before exiting */ if (oct->pci_dev->reset_fn) @@ -1163,6 +1155,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) unregister_netdev(netdev); + cleanup_rx_oom_poll_fn(netdev); + cleanup_link_status_change_wq(netdev); delete_glists(lio); @@ -1642,8 +1636,12 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget) /* Flush the instruction queue */ iq = oct->instr_queue[iq_no]; if (iq) { - /* Process iq buffers with in the budget limits */ - tx_done = octeon_flush_iq(oct, iq, budget); + if (atomic_read(&iq->instr_pending)) + /* Process iq buffers with in the budget limits */ + tx_done = octeon_flush_iq(oct, iq, budget); + else + tx_done = 1; + /* Update iq read-index rather than waiting for next interrupt. * Return back if tx_done is false. */ @@ -2486,6 +2484,8 @@ liquidio_vlan_rx_add_vid(struct net_device *netdev, struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; struct octnic_ctrl_pkt nctrl; + struct completion compl; + u16 response_code; int ret = 0; memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); @@ -2497,14 +2497,25 @@ liquidio_vlan_rx_add_vid(struct net_device *netdev, nctrl.wait_time = 100; nctrl.netpndev = (u64)netdev; nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + init_completion(&compl); + nctrl.completion = &compl; + nctrl.response_code = &response_code; ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); if (ret < 0) { dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n", ret); + return -EIO; } - return ret; + if (!wait_for_completion_timeout(&compl, + msecs_to_jiffies(nctrl.wait_time))) + return -EPERM; + + if (READ_ONCE(response_code)) + return -EPERM; + + return 0; } static int @@ -2549,6 +2560,8 @@ static int liquidio_set_rxcsum_command(struct net_device *netdev, int command, struct octnic_ctrl_pkt nctrl; int ret = 0; + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + nctrl.ncmd.u64 = 0; nctrl.ncmd.s.cmd = command; nctrl.ncmd.s.param1 = rx_cmd; @@ -2581,6 +2594,8 @@ static int liquidio_vxlan_port_command(struct net_device *netdev, int command, struct octnic_ctrl_pkt nctrl; int ret = 0; + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + nctrl.ncmd.u64 = 0; nctrl.ncmd.s.cmd = command; nctrl.ncmd.s.more = vxlan_cmd_bit; @@ -3003,6 +3018,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) if (setup_link_status_change_wq(netdev)) goto setup_nic_dev_fail; + if (setup_rx_oom_poll_fn(netdev)) + goto setup_nic_dev_fail; + /* Register the network device with the OS */ if (register_netdev(netdev)) { dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n"); @@ -3057,7 +3075,6 @@ setup_nic_wait_intr: */ static int liquidio_init_nic_module(struct octeon_device *oct) { - struct oct_intrmod_cfg *intrmod_cfg; int num_nic_ports = 1; int i, retval = 0; @@ -3079,22 +3096,6 @@ static int liquidio_init_nic_module(struct octeon_device *oct) goto octnet_init_failure; } - /* Initialize interrupt moderation params */ - intrmod_cfg = &((struct octeon_device *)oct)->intrmod; - intrmod_cfg->rx_enable = 1; - intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; - intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR; - intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR; - intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER; - intrmod_cfg->rx_maxtmr_trigger = LIO_INTRMOD_RXMAXTMR_TRIGGER; - intrmod_cfg->rx_mintmr_trigger = LIO_INTRMOD_RXMINTMR_TRIGGER; - intrmod_cfg->rx_mincnt_trigger = LIO_INTRMOD_RXMINCNT_TRIGGER; - intrmod_cfg->tx_enable = 1; - intrmod_cfg->tx_maxcnt_trigger = LIO_INTRMOD_TXMAXCNT_TRIGGER; - intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER; - intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct)); - intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct)); - intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct)); dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n"); return retval; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 294c6f3c6b48..8ea2323d8d67 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -27,7 +27,7 @@ #define LIQUIDIO_PACKAGE "" #define LIQUIDIO_BASE_MAJOR_VERSION 1 -#define LIQUIDIO_BASE_MINOR_VERSION 4 +#define LIQUIDIO_BASE_MINOR_VERSION 5 #define LIQUIDIO_BASE_MICRO_VERSION 1 #define LIQUIDIO_BASE_VERSION __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ __stringify(LIQUIDIO_BASE_MINOR_VERSION) @@ -83,6 +83,7 @@ enum octeon_tag_type { #define OPCODE_NIC_INTRMOD_CFG 0x08 #define OPCODE_NIC_IF_CFG 0x09 #define OPCODE_NIC_VF_DRV_NOTICE 0x0A +#define OPCODE_NIC_INTRMOD_PARAMS 0x0B #define VF_DRV_LOADED 1 #define VF_DRV_REMOVED -1 #define VF_DRV_MACADDR_CHANGED 2 @@ -100,6 +101,11 @@ enum octeon_tag_type { #define BYTES_PER_DHLEN_UNIT 8 #define MAX_REG_CNT 2000000U +#define INTRNAMSIZ 32 +#define IRQ_NAME_OFF(i) ((i) * INTRNAMSIZ) +#define MAX_IOQ_INTERRUPTS_PER_PF (64 * 2) +#define MAX_IOQ_INTERRUPTS_PER_VF (8 * 2) + static inline u32 incr_index(u32 index, u32 count, u32 max) { @@ -181,6 +187,7 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry, #define OCTNET_CMD_Q 0 /* NIC Command types */ +#define OCTNET_CMD_RESET_PF 0x0 #define OCTNET_CMD_CHANGE_MTU 0x1 #define OCTNET_CMD_CHANGE_MACADDR 0x2 #define OCTNET_CMD_CHANGE_DEVFLAGS 0x3 @@ -845,29 +852,6 @@ struct oct_mdio_cmd { #define OCT_LINK_STATS_SIZE (sizeof(struct oct_link_stats)) -/* intrmod: max. packet rate threshold */ -#define LIO_INTRMOD_MAXPKT_RATETHR 196608 -/* intrmod: min. packet rate threshold */ -#define LIO_INTRMOD_MINPKT_RATETHR 9216 -/* intrmod: max. packets to trigger interrupt */ -#define LIO_INTRMOD_RXMAXCNT_TRIGGER 384 -/* intrmod: min. packets to trigger interrupt */ -#define LIO_INTRMOD_RXMINCNT_TRIGGER 0 -/* intrmod: max. time to trigger interrupt */ -#define LIO_INTRMOD_RXMAXTMR_TRIGGER 128 -/* 66xx:intrmod: min. time to trigger interrupt - * (value of 1 is optimum for TCP_RR) - */ -#define LIO_INTRMOD_RXMINTMR_TRIGGER 1 - -/* intrmod: max. packets to trigger interrupt */ -#define LIO_INTRMOD_TXMAXCNT_TRIGGER 64 -/* intrmod: min. packets to trigger interrupt */ -#define LIO_INTRMOD_TXMINCNT_TRIGGER 0 - -/* intrmod: poll interval in seconds */ -#define LIO_INTRMOD_CHECK_INTERVAL 1 - struct oct_intrmod_cfg { u64 rx_enable; u64 tx_enable; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 9675ffbf25e6..e21b477d0159 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -793,7 +793,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct) u32 num_descs = 0; u32 iq_no = 0; union oct_txpciq txpciq; - int numa_node = cpu_to_node(iq_no % num_online_cpus()); + int numa_node = dev_to_node(&oct->pci_dev->dev); if (OCTEON_CN6XXX(oct)) num_descs = @@ -837,7 +837,7 @@ int octeon_setup_output_queues(struct octeon_device *oct) u32 num_descs = 0; u32 desc_size = 0; u32 oq_no = 0; - int numa_node = cpu_to_node(oq_no % num_online_cpus()); + int numa_node = dev_to_node(&oct->pci_dev->dev); if (OCTEON_CN6XXX(oct)) { num_descs = diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index c301a3852482..92f67de111aa 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -453,9 +453,6 @@ struct octeon_device { /** List of dispatch functions */ struct octeon_dispatch_list dispatch; - /* Interrupt Moderation */ - struct oct_intrmod_cfg intrmod; - u32 int_status; u64 droq_intr; @@ -517,6 +514,9 @@ struct octeon_device { void *msix_entries; + /* when requesting IRQs, the names are stored here */ + void *irq_name_storage; + struct octeon_sriov_info sriov_info; struct octeon_pf_vf_hs_word pfvf_hsword; @@ -538,6 +538,12 @@ struct octeon_device { u32 priv_flags; void *watchdog_task; + + u32 rx_coalesce_usecs; + u32 rx_max_coalesced_frames; + u32 tx_max_coalesced_frames; + + bool cores_crashed; }; #define OCT_DRV_ONLINE 1 @@ -551,12 +557,6 @@ struct octeon_device { #define CHIP_CONF(oct, TYPE) \ (((struct octeon_ ## TYPE *)((oct)->chip))->conf) -struct oct_intrmod_cmd { - struct octeon_device *oct_dev; - struct octeon_soft_command *sc; - struct oct_intrmod_cfg *cfg; -}; - /*------------------ Function Prototypes ----------------------*/ /** Initialize device list memory */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 79f809479af6..286be5539cef 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -226,8 +226,7 @@ int octeon_init_droq(struct octeon_device *oct, struct octeon_droq *droq; u32 desc_ring_size = 0, c_num_descs = 0, c_buf_size = 0; u32 c_pkts_per_intr = 0, c_refill_threshold = 0; - int orig_node = dev_to_node(&oct->pci_dev->dev); - int numa_node = cpu_to_node(q_no % num_online_cpus()); + int numa_node = dev_to_node(&oct->pci_dev->dev); dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no); @@ -267,13 +266,8 @@ int octeon_init_droq(struct octeon_device *oct, droq->buffer_size = c_buf_size; desc_ring_size = droq->max_count * OCT_DROQ_DESC_SIZE; - set_dev_node(&oct->pci_dev->dev, numa_node); droq->desc_ring = lio_dma_alloc(oct, desc_ring_size, (dma_addr_t *)&droq->desc_ring_dma); - set_dev_node(&oct->pci_dev->dev, orig_node); - if (!droq->desc_ring) - droq->desc_ring = lio_dma_alloc(oct, desc_ring_size, - (dma_addr_t *)&droq->desc_ring_dma); if (!droq->desc_ring) { dev_err(&oct->pci_dev->dev, @@ -519,6 +513,32 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq) return desc_refilled; } +/** check if we can allocate packets to get out of oom. + * @param droq - Droq being checked. + * @return does not return anything + */ +void octeon_droq_check_oom(struct octeon_droq *droq) +{ + int desc_refilled; + struct octeon_device *oct = droq->oct_dev; + + if (readl(droq->pkts_credit_reg) <= CN23XX_SLI_DEF_BP) { + spin_lock_bh(&droq->lock); + desc_refilled = octeon_droq_refill(oct, droq); + if (desc_refilled) { + /* Flush the droq descriptor data to memory to be sure + * that when we update the credits the data in memory + * is accurate. + */ + wmb(); + writel(desc_refilled, droq->pkts_credit_reg); + /* make sure mmio write completes */ + mmiowb(); + } + spin_unlock_bh(&droq->lock); + } +} + static inline u32 octeon_droq_get_bufcount(u32 buf_size, u32 total_len) { @@ -970,7 +990,7 @@ int octeon_create_droq(struct octeon_device *oct, u32 desc_size, void *app_ctx) { struct octeon_droq *droq; - int numa_node = cpu_to_node(q_no % num_online_cpus()); + int numa_node = dev_to_node(&oct->pci_dev->dev); if (oct->droq[q_no]) { dev_dbg(&oct->pci_dev->dev, "Droq already in use. Cannot create droq %d again\n", diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index 6982c0af5ecc..9781577115e7 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -426,4 +426,6 @@ int octeon_droq_process_packets(struct octeon_device *oct, int octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd, u32 arg); +void octeon_droq_check_oom(struct octeon_droq *droq); + #endif /*__OCTEON_DROQ_H__ */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index 4608a5af35a3..5063a12613e5 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -152,7 +152,7 @@ struct octeon_instr_queue { struct oct_iq_stats stats; /** DMA mapped base address of the input descriptor ring. */ - u64 base_addr_dma; + dma_addr_t base_addr_dma; /** Application context */ void *app_ctx; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c index 201b9875f9bb..5cca73b8880b 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c @@ -313,6 +313,7 @@ int octeon_mbox_process_message(struct octeon_mbox *mbox) return 0; } + spin_unlock_irqrestore(&mbox->lock, flags); WARN_ON(1); return 0; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index eef2a1e8a7e3..bf483932ff25 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -28,6 +28,12 @@ #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) #define LIO_MIN_MTU_SIZE ETH_MIN_MTU +/* Bit mask values for lio->ifstate */ +#define LIO_IFSTATE_DROQ_OPS 0x01 +#define LIO_IFSTATE_REGISTERED 0x02 +#define LIO_IFSTATE_RUNNING 0x04 +#define LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08 + struct oct_nic_stats_resp { u64 rh; struct oct_link_stats stats; @@ -123,6 +129,9 @@ struct lio { /* work queue for txq status */ struct cavium_wq txq_status_wq; + /* work queue for rxq oom status */ + struct cavium_wq rxq_status_wq; + /* work queue for link status */ struct cavium_wq link_status_wq; @@ -132,10 +141,6 @@ struct lio { #define LIO_SIZE (sizeof(struct lio)) #define GET_LIO(netdev) ((struct lio *)netdev_priv(netdev)) -#define CIU3_WDOG(c) (0x1010000020000ULL + ((c) << 3)) -#define CIU3_WDOG_MASK 12ULL -#define LIO_MONITOR_WDOG_EXPIRE 1 -#define LIO_MONITOR_CORE_STUCK_MSGD 2 #define LIO_MAX_CORES 12 /** @@ -146,6 +151,10 @@ struct lio { */ int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1); +int setup_rx_oom_poll_fn(struct net_device *netdev); + +void cleanup_rx_oom_poll_fn(struct net_device *netdev); + /** * \brief Link control command completion callback * @param nctrl_ptr pointer to control packet structure @@ -438,4 +447,34 @@ static inline void octeon_fast_packet_next(struct octeon_droq *droq, get_rbd(droq->recv_buf_list[idx].buffer), copy_len); } +/** + * \brief check interface state + * @param lio per-network private data + * @param state_flag flag state to check + */ +static inline int ifstate_check(struct lio *lio, int state_flag) +{ + return atomic_read(&lio->ifstate) & state_flag; +} + +/** + * \brief set interface state + * @param lio per-network private data + * @param state_flag flag state to set + */ +static inline void ifstate_set(struct lio *lio, int state_flag) +{ + atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag)); +} + +/** + * \brief clear interface state + * @param lio per-network private data + * @param state_flag flag state to clear + */ +static inline void ifstate_reset(struct lio *lio, int state_flag) +{ + atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag))); +} + #endif diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 0243be8dd56f..b457cf23fce6 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -100,14 +100,16 @@ static void octnet_link_ctrl_callback(struct octeon_device *oct, nctrl = (struct octnic_ctrl_pkt *)sc->ctxptr; - /* Call the callback function if status is OK. - * Status is OK only if a response was expected and core returned - * success. + /* Call the callback function if status is zero (meaning OK) or status + * contains a firmware status code bigger than zero (meaning the + * firmware is reporting an error). * If no response was expected, status is OK if the command was posted * successfully. */ - if (!status && nctrl->cb_fn) + if ((!status || status > FIRMWARE_STATUS_CODE(0)) && nctrl->cb_fn) { + nctrl->status = status; nctrl->cb_fn(nctrl); + } octeon_free_soft_command(oct, sc); } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h index 0c7a5c9b2932..6480ef863441 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h @@ -62,6 +62,10 @@ struct octnic_ctrl_pkt { /** Callback function called when the command has been fetched */ octnic_ctrl_pkt_cb_fn_t cb_fn; + + u32 status; + u16 *response_code; + struct completion *completion; }; #define MAX_UDD_SIZE(nctrl) (sizeof((nctrl)->udd)) diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 707bc15adec6..261f448f9de2 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -62,8 +62,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, u32 iq_no = (u32)txpciq.s.q_no; u32 q_size; struct cavium_wq *db_wq; - int orig_node = dev_to_node(&oct->pci_dev->dev); - int numa_node = cpu_to_node(iq_no % num_online_cpus()); + int numa_node = dev_to_node(&oct->pci_dev->dev); if (OCTEON_CN6XXX(oct)) conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx))); @@ -91,13 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, iq->oct_dev = oct; - set_dev_node(&oct->pci_dev->dev, numa_node); - iq->base_addr = lio_dma_alloc(oct, q_size, - (dma_addr_t *)&iq->base_addr_dma); - set_dev_node(&oct->pci_dev->dev, orig_node); - if (!iq->base_addr) - iq->base_addr = lio_dma_alloc(oct, q_size, - (dma_addr_t *)&iq->base_addr_dma); + iq->base_addr = lio_dma_alloc(oct, q_size, &iq->base_addr_dma); if (!iq->base_addr) { dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n", iq_no); @@ -211,7 +204,7 @@ int octeon_setup_iq(struct octeon_device *oct, void *app_ctx) { u32 iq_no = (u32)txpciq.s.q_no; - int numa_node = cpu_to_node(iq_no % num_online_cpus()); + int numa_node = dev_to_node(&oct->pci_dev->dev); if (oct->instr_queue[iq_no]) { dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n", diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index 2fbaae96b505..3d691c69f74d 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -69,50 +69,53 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev, int resp_to_process = MAX_ORD_REQS_TO_PROCESS; u32 status; u64 status64; - struct octeon_instr_rdp *rdp; - u64 rptr; ordered_sc_list = &octeon_dev->response_list[OCTEON_ORDERED_SC_LIST]; do { spin_lock_bh(&ordered_sc_list->lock); - if (ordered_sc_list->head.next == &ordered_sc_list->head) { + if (list_empty(&ordered_sc_list->head)) { spin_unlock_bh(&ordered_sc_list->lock); return 1; } - sc = (struct octeon_soft_command *)ordered_sc_list-> - head.next; - if (OCTEON_CN23XX_PF(octeon_dev) || - OCTEON_CN23XX_VF(octeon_dev)) { - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; - rptr = sc->cmd.cmd3.rptr; - } else { - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - rptr = sc->cmd.cmd2.rptr; - } + sc = list_first_entry(&ordered_sc_list->head, + struct octeon_soft_command, node); status = OCTEON_REQUEST_PENDING; /* check if octeon has finished DMA'ing a response * to where rptr is pointing to */ - dma_sync_single_for_cpu(&octeon_dev->pci_dev->dev, - rptr, rdp->rlen, - DMA_FROM_DEVICE); status64 = *sc->status_word; if (status64 != COMPLETION_WORD_INIT) { + /* This logic ensures that all 64b have been written. + * 1. check byte 0 for non-FF + * 2. if non-FF, then swap result from BE to host order + * 3. check byte 7 (swapped to 0) for non-FF + * 4. if non-FF, use the low 32-bit status code + * 5. if either byte 0 or byte 7 is FF, don't use status + */ if ((status64 & 0xff) != 0xff) { octeon_swap_8B_data(&status64, 1); if (((status64 & 0xff) != 0xff)) { - status = (u32)(status64 & - 0xffffffffULL); + /* retrieve 16-bit firmware status */ + status = (u32)(status64 & 0xffffULL); + if (status) { + status = + FIRMWARE_STATUS_CODE(status); + } else { + /* i.e. no error */ + status = OCTEON_REQUEST_DONE; + } } } } else if (force_quit || (sc->timeout && time_after(jiffies, (unsigned long)sc->timeout))) { + dev_err(&octeon_dev->pci_dev->dev, "%s: cmd failed, timeout (%ld, %ld)\n", + __func__, (long)jiffies, (long)sc->timeout); status = OCTEON_REQUEST_TIMEOUT; } diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.h b/drivers/net/ethernet/cavium/liquidio/response_manager.h index cbb2d84e8932..9169c2815dba 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.h +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.h @@ -78,6 +78,8 @@ enum { /*------------ Error codes used by host driver -----------------*/ #define DRIVER_MAJOR_ERROR_CODE 0x0000 +/*------ Error codes used by firmware (bits 15..0 set by firmware */ +#define FIRMWARE_MAJOR_ERROR_CODE 0x0001 /** A value of 0x00000000 indicates no error i.e. success */ #define DRIVER_ERROR_NONE 0x00000000 @@ -116,6 +118,9 @@ enum { }; +#define FIRMWARE_STATUS_CODE(status) \ + ((FIRMWARE_MAJOR_ERROR_CODE << 16) | (status)) + /** Initialize the response lists. The number of response lists to create is * given by count. * @param octeon_dev - the octeon device structure. diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 2269ff562d95..4a02e618e318 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -252,12 +252,14 @@ struct nicvf_drv_stats { u64 tx_csum_overflow; /* driver debug stats */ - u64 rcv_buffer_alloc_failures; u64 tx_tso; u64 tx_timeout; u64 txq_stop; u64 txq_wake; + u64 rcv_buffer_alloc_failures; + u64 page_alloc; + struct u64_stats_sync syncp; }; @@ -266,9 +268,9 @@ struct nicvf { struct net_device *netdev; struct pci_dev *pdev; void __iomem *reg_base; + struct bpf_prog *xdp_prog; #define MAX_QUEUES_PER_QSET 8 struct queue_set *qs; - struct nicvf_cq_poll *napi[8]; void *iommu_domain; u8 vf_id; u8 sqs_id; @@ -294,6 +296,7 @@ struct nicvf { /* Queue count */ u8 rx_queues; u8 tx_queues; + u8 xdp_tx_queues; u8 max_queues; u8 node; @@ -318,10 +321,11 @@ struct nicvf { struct nicvf_drv_stats __percpu *drv_stats; struct bgx_stats bgx_stats; + /* Napi */ + struct nicvf_cq_poll *napi[8]; + /* MSI-X */ - bool msix_enabled; u8 num_vec; - struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15]; bool irq_allocated[NIC_VF_MSIX_VECTORS]; cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS]; diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 767234e2e8f9..fb770b0182d3 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -65,9 +65,7 @@ struct nicpf { bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; /* MSI-X */ - bool msix_enabled; u8 num_vec; - struct msix_entry *msix_entries; bool irq_allocated[NIC_PF_MSIX_VECTORS]; char irq_name[NIC_PF_MSIX_VECTORS][20]; }; @@ -1088,7 +1086,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) u64 intr; u8 vf, vf_per_mbx_reg = 64; - if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector) + if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0)) mbx = 0; else mbx = 1; @@ -1107,51 +1105,13 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) return IRQ_HANDLED; } -static int nic_enable_msix(struct nicpf *nic) -{ - int i, ret; - - nic->num_vec = pci_msix_vec_count(nic->pdev); - - nic->msix_entries = kmalloc_array(nic->num_vec, - sizeof(struct msix_entry), - GFP_KERNEL); - if (!nic->msix_entries) - return -ENOMEM; - - for (i = 0; i < nic->num_vec; i++) - nic->msix_entries[i].entry = i; - - ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); - if (ret) { - dev_err(&nic->pdev->dev, - "Request for #%d msix vectors failed, returned %d\n", - nic->num_vec, ret); - kfree(nic->msix_entries); - return ret; - } - - nic->msix_enabled = 1; - return 0; -} - -static void nic_disable_msix(struct nicpf *nic) -{ - if (nic->msix_enabled) { - pci_disable_msix(nic->pdev); - kfree(nic->msix_entries); - nic->msix_enabled = 0; - nic->num_vec = 0; - } -} - static void nic_free_all_interrupts(struct nicpf *nic) { int irq; for (irq = 0; irq < nic->num_vec; irq++) { if (nic->irq_allocated[irq]) - free_irq(nic->msix_entries[irq].vector, nic); + free_irq(pci_irq_vector(nic->pdev, irq), nic); nic->irq_allocated[irq] = false; } } @@ -1159,18 +1119,24 @@ static void nic_free_all_interrupts(struct nicpf *nic) static int nic_register_interrupts(struct nicpf *nic) { int i, ret; + nic->num_vec = pci_msix_vec_count(nic->pdev); /* Enable MSI-X */ - ret = nic_enable_msix(nic); - if (ret) - return ret; + ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, + PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&nic->pdev->dev, + "Request for #%d msix vectors failed, returned %d\n", + nic->num_vec, ret); + return 1; + } /* Register mailbox interrupt handler */ for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) { sprintf(nic->irq_name[i], "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0)); - ret = request_irq(nic->msix_entries[i].vector, + ret = request_irq(pci_irq_vector(nic->pdev, i), nic_mbx_intr_handler, 0, nic->irq_name[i], nic); if (ret) @@ -1186,14 +1152,16 @@ static int nic_register_interrupts(struct nicpf *nic) fail: dev_err(&nic->pdev->dev, "Request irq failed\n"); nic_free_all_interrupts(nic); - nic_disable_msix(nic); + pci_free_irq_vectors(nic->pdev); + nic->num_vec = 0; return ret; } static void nic_unregister_interrupts(struct nicpf *nic) { nic_free_all_interrupts(nic); - nic_disable_msix(nic); + pci_free_irq_vectors(nic->pdev); + nic->num_vec = 0; } static int nic_num_sqs_en(struct nicpf *nic, int vf_en) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 02a986cdbb39..b9ece9cbf98b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -100,11 +100,12 @@ static const struct nicvf_stat nicvf_drv_stats[] = { NICVF_DRV_STAT(tx_csum_overlap), NICVF_DRV_STAT(tx_csum_overflow), - NICVF_DRV_STAT(rcv_buffer_alloc_failures), NICVF_DRV_STAT(tx_tso), NICVF_DRV_STAT(tx_timeout), NICVF_DRV_STAT(txq_stop), NICVF_DRV_STAT(txq_wake), + NICVF_DRV_STAT(rcv_buffer_alloc_failures), + NICVF_DRV_STAT(page_alloc), }; static const struct nicvf_stat nicvf_queue_stats[] = { @@ -720,7 +721,7 @@ static int nicvf_set_channels(struct net_device *dev, struct nicvf *nic = netdev_priv(dev); int err = 0; bool if_up = netif_running(dev); - int cqcount; + u8 cqcount, txq_count; if (!channel->rx_count || !channel->tx_count) return -EINVAL; @@ -729,10 +730,26 @@ static int nicvf_set_channels(struct net_device *dev, if (channel->tx_count > nic->max_queues) return -EINVAL; + if (nic->xdp_prog && + ((channel->tx_count + channel->rx_count) > nic->max_queues)) { + netdev_err(nic->netdev, + "XDP mode, RXQs + TXQs > Max %d\n", + nic->max_queues); + return -EINVAL; + } + if (if_up) nicvf_stop(dev); - cqcount = max(channel->rx_count, channel->tx_count); + nic->rx_queues = channel->rx_count; + nic->tx_queues = channel->tx_count; + if (!nic->xdp_prog) + nic->xdp_tx_queues = 0; + else + nic->xdp_tx_queues = channel->rx_count; + + txq_count = nic->xdp_tx_queues + nic->tx_queues; + cqcount = max(nic->rx_queues, txq_count); if (cqcount > MAX_CMP_QUEUES_PER_QS) { nic->sqs_count = roundup(cqcount, MAX_CMP_QUEUES_PER_QS); @@ -741,12 +758,10 @@ static int nicvf_set_channels(struct net_device *dev, nic->sqs_count = 0; } - nic->qs->rq_cnt = min_t(u32, channel->rx_count, MAX_RCV_QUEUES_PER_QS); - nic->qs->sq_cnt = min_t(u32, channel->tx_count, MAX_SND_QUEUES_PER_QS); + nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); + nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt); - nic->rx_queues = channel->rx_count; - nic->tx_queues = channel->tx_count; err = nicvf_set_real_num_queues(dev, nic->tx_queues, nic->rx_queues); if (err) return err; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 24017588f531..d6477af88085 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -17,6 +17,9 @@ #include <linux/prefetch.h> #include <linux/irq.h> #include <linux/iommu.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/filter.h> #include "nic_reg.h" #include "nic.h" @@ -397,8 +400,10 @@ static void nicvf_request_sqs(struct nicvf *nic) if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; - if (nic->tx_queues > MAX_SND_QUEUES_PER_QS) - tx_queues = nic->tx_queues - MAX_SND_QUEUES_PER_QS; + + tx_queues = nic->tx_queues + nic->xdp_tx_queues; + if (tx_queues > MAX_SND_QUEUES_PER_QS) + tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; /* Set no of Rx/Tx queues in each of the SQsets */ for (sqs = 0; sqs < nic->sqs_count; sqs++) { @@ -496,12 +501,99 @@ static int nicvf_init_resources(struct nicvf *nic) return 0; } +static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, + struct cqe_rx_t *cqe_rx, struct snd_queue *sq, + struct sk_buff **skb) +{ + struct xdp_buff xdp; + struct page *page; + u32 action; + u16 len, offset = 0; + u64 dma_addr, cpu_addr; + void *orig_data; + + /* Retrieve packet buffer's DMA address and length */ + len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); + dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); + + cpu_addr = nicvf_iova_to_phys(nic, dma_addr); + if (!cpu_addr) + return false; + cpu_addr = (u64)phys_to_virt(cpu_addr); + page = virt_to_page((void *)cpu_addr); + + xdp.data_hard_start = page_address(page); + xdp.data = (void *)cpu_addr; + xdp.data_end = xdp.data + len; + orig_data = xdp.data; + + rcu_read_lock(); + action = bpf_prog_run_xdp(prog, &xdp); + rcu_read_unlock(); + + /* Check if XDP program has changed headers */ + if (orig_data != xdp.data) { + len = xdp.data_end - xdp.data; + offset = orig_data - xdp.data; + dma_addr -= offset; + } + + switch (action) { + case XDP_PASS: + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } + + /* Build SKB and pass on packet to network stack */ + *skb = build_skb(xdp.data, + RCV_FRAG_LEN - cqe_rx->align_pad + offset); + if (!*skb) + put_page(page); + else + skb_put(*skb, len); + return false; + case XDP_TX: + nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); + return true; + default: + bpf_warn_invalid_xdp_action(action); + case XDP_ABORTED: + trace_xdp_exception(nic->netdev, prog, action); + case XDP_DROP: + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } + put_page(page); + return true; + } + return false; +} + static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cqe_send_t *cqe_tx, - int cqe_type, int budget, + int budget, int *subdesc_cnt, unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; + struct page *page; struct nicvf *nic = netdev_priv(netdev); struct snd_queue *sq; struct sq_hdr_subdesc *hdr; @@ -513,12 +605,26 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) return; - netdev_dbg(nic->netdev, - "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n", - __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, - cqe_tx->sqe_ptr, hdr->subdesc_cnt); + /* Check for errors */ + if (cqe_tx->send_status) + nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); + + /* Is this a XDP designated Tx queue */ + if (sq->is_xdp) { + page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; + /* Check if it's recycled page or else unmap DMA mapping */ + if (page && (page_ref_count(page) == 1)) + nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, + hdr->subdesc_cnt); + + /* Release page reference for recycling */ + if (page) + put_page(page); + sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; + *subdesc_cnt += hdr->subdesc_cnt + 1; + return; + } - nicvf_check_cqe_tx_errs(nic, cqe_tx); skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; if (skb) { /* Check for dummy descriptor used for HW TSO offload on 88xx */ @@ -528,12 +634,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, tso_sqe->subdesc_cnt); - nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1); + *subdesc_cnt += tso_sqe->subdesc_cnt + 1; } else { nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, hdr->subdesc_cnt); } - nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + *subdesc_cnt += hdr->subdesc_cnt + 1; prefetch(skb); (*tx_pkts)++; *tx_bytes += skb->len; @@ -544,7 +650,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, * a SKB attached, so just free SQEs here. */ if (!nic->hw_tso) - nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + *subdesc_cnt += hdr->subdesc_cnt + 1; } } @@ -578,9 +684,9 @@ static inline void nicvf_set_rxhash(struct net_device *netdev, static void nicvf_rcv_pkt_handler(struct net_device *netdev, struct napi_struct *napi, - struct cqe_rx_t *cqe_rx) + struct cqe_rx_t *cqe_rx, struct snd_queue *sq) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); struct nicvf *snic = nic; int err = 0; @@ -595,16 +701,25 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, } /* Check for errors */ - err = nicvf_check_cqe_rx_errs(nic, cqe_rx); - if (err && !cqe_rx->rb_cnt) - return; + if (cqe_rx->err_level || cqe_rx->err_opcode) { + err = nicvf_check_cqe_rx_errs(nic, cqe_rx); + if (err && !cqe_rx->rb_cnt) + return; + } - skb = nicvf_get_rcv_skb(snic, cqe_rx); - if (!skb) { - netdev_dbg(nic->netdev, "Packet not received\n"); - return; + /* For XDP, ignore pkts spanning multiple pages */ + if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { + /* Packet consumed by XDP */ + if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb)) + return; + } else { + skb = nicvf_get_rcv_skb(snic, cqe_rx, + nic->xdp_prog ? true : false); } + if (!skb) + return; + if (netif_msg_pktdata(nic)) { netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name, skb, skb->len); @@ -646,13 +761,14 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, { int processed_cqe, work_done = 0, tx_done = 0; int cqe_count, cqe_head; + int subdesc_cnt = 0; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; - struct snd_queue *sq; - unsigned int tx_pkts = 0, tx_bytes = 0; + struct snd_queue *sq = &qs->sq[cq_idx]; + unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; spin_lock_bh(&cq->lock); loop: @@ -667,8 +783,6 @@ loop: cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; cqe_head &= 0xFFFF; - netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n", - __func__, cq_idx, cqe_count, cqe_head); while (processed_cqe < cqe_count) { /* Get the CQ descriptor */ cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); @@ -682,17 +796,15 @@ loop: break; } - netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n", - cq_idx, cq_desc->cqe_type); switch (cq_desc->cqe_type) { case CQE_TYPE_RX: - nicvf_rcv_pkt_handler(netdev, napi, cq_desc); + nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq); work_done++; break; case CQE_TYPE_SEND: - nicvf_snd_pkt_handler(netdev, - (void *)cq_desc, CQE_TYPE_SEND, - budget, &tx_pkts, &tx_bytes); + nicvf_snd_pkt_handler(netdev, (void *)cq_desc, + budget, &subdesc_cnt, + &tx_pkts, &tx_bytes); tx_done++; break; case CQE_TYPE_INVALID: @@ -704,9 +816,6 @@ loop: } processed_cqe++; } - netdev_dbg(nic->netdev, - "%s CQ%d processed_cqe %d work_done %d budget %d\n", - __func__, cq_idx, processed_cqe, work_done, budget); /* Ring doorbell to inform H/W to reuse processed CQEs */ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, @@ -716,13 +825,26 @@ loop: goto loop; done: + /* Update SQ's descriptor free count */ + if (subdesc_cnt) + nicvf_put_sq_desc(sq, subdesc_cnt); + + txq_idx = nicvf_netdev_qidx(nic, cq_idx); + /* Handle XDP TX queues */ + if (nic->pnicvf->xdp_prog) { + if (txq_idx < nic->pnicvf->xdp_tx_queues) { + nicvf_xdp_sq_doorbell(nic, sq, cq_idx); + goto out; + } + nic = nic->pnicvf; + txq_idx -= nic->pnicvf->xdp_tx_queues; + } + /* Wakeup TXQ if its stopped earlier due to SQ full */ - sq = &nic->qs->sq[cq_idx]; if (tx_done || (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { netdev = nic->pnicvf->netdev; - txq = netdev_get_tx_queue(netdev, - nicvf_netdev_qidx(nic, cq_idx)); + txq = netdev_get_tx_queue(netdev, txq_idx); if (tx_pkts) netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); @@ -735,10 +857,11 @@ done: if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit queue wakeup SQ%d\n", - netdev->name, cq_idx); + netdev->name, txq_idx); } } +out: spin_unlock_bh(&cq->lock); return work_done; } @@ -882,38 +1005,9 @@ static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) return IRQ_HANDLED; } -static int nicvf_enable_msix(struct nicvf *nic) -{ - int ret, vec; - - nic->num_vec = NIC_VF_MSIX_VECTORS; - - for (vec = 0; vec < nic->num_vec; vec++) - nic->msix_entries[vec].entry = vec; - - ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); - if (ret) { - netdev_err(nic->netdev, - "Req for #%d msix vectors failed\n", nic->num_vec); - return 0; - } - nic->msix_enabled = 1; - return 1; -} - -static void nicvf_disable_msix(struct nicvf *nic) -{ - if (nic->msix_enabled) { - pci_disable_msix(nic->pdev); - nic->msix_enabled = 0; - nic->num_vec = 0; - } -} - static void nicvf_set_irq_affinity(struct nicvf *nic) { int vec, cpu; - int irqnum; for (vec = 0; vec < nic->num_vec; vec++) { if (!nic->irq_allocated[vec]) @@ -930,15 +1024,14 @@ static void nicvf_set_irq_affinity(struct nicvf *nic) cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), nic->affinity_mask[vec]); - irqnum = nic->msix_entries[vec].vector; - irq_set_affinity_hint(irqnum, nic->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), + nic->affinity_mask[vec]); } } static int nicvf_register_interrupts(struct nicvf *nic) { int irq, ret = 0; - int vector; for_each_cq_irq(irq) sprintf(nic->irq_name[irq], "%s-rxtx-%d", @@ -957,8 +1050,8 @@ static int nicvf_register_interrupts(struct nicvf *nic) /* Register CQ interrupts */ for (irq = 0; irq < nic->qs->cq_cnt; irq++) { - vector = nic->msix_entries[irq].vector; - ret = request_irq(vector, nicvf_intr_handler, + ret = request_irq(pci_irq_vector(nic->pdev, irq), + nicvf_intr_handler, 0, nic->irq_name[irq], nic->napi[irq]); if (ret) goto err; @@ -968,8 +1061,8 @@ static int nicvf_register_interrupts(struct nicvf *nic) /* Register RBDR interrupt */ for (irq = NICVF_INTR_ID_RBDR; irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { - vector = nic->msix_entries[irq].vector; - ret = request_irq(vector, nicvf_rbdr_intr_handler, + ret = request_irq(pci_irq_vector(nic->pdev, irq), + nicvf_rbdr_intr_handler, 0, nic->irq_name[irq], nic); if (ret) goto err; @@ -981,7 +1074,7 @@ static int nicvf_register_interrupts(struct nicvf *nic) nic->pnicvf->netdev->name, nic->sqs_mode ? (nic->sqs_id + 1) : 0); irq = NICVF_INTR_ID_QS_ERR; - ret = request_irq(nic->msix_entries[irq].vector, + ret = request_irq(pci_irq_vector(nic->pdev, irq), nicvf_qs_err_intr_handler, 0, nic->irq_name[irq], nic); if (ret) @@ -1001,6 +1094,7 @@ err: static void nicvf_unregister_interrupts(struct nicvf *nic) { + struct pci_dev *pdev = nic->pdev; int irq; /* Free registered interrupts */ @@ -1008,19 +1102,20 @@ static void nicvf_unregister_interrupts(struct nicvf *nic) if (!nic->irq_allocated[irq]) continue; - irq_set_affinity_hint(nic->msix_entries[irq].vector, NULL); + irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); free_cpumask_var(nic->affinity_mask[irq]); if (irq < NICVF_INTR_ID_SQ) - free_irq(nic->msix_entries[irq].vector, nic->napi[irq]); + free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); else - free_irq(nic->msix_entries[irq].vector, nic); + free_irq(pci_irq_vector(pdev, irq), nic); nic->irq_allocated[irq] = false; } /* Disable MSI-X */ - nicvf_disable_msix(nic); + pci_free_irq_vectors(pdev); + nic->num_vec = 0; } /* Initialize MSIX vectors and register MISC interrupt. @@ -1032,16 +1127,22 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) int irq = NICVF_INTR_ID_MISC; /* Return if mailbox interrupt is already registered */ - if (nic->msix_enabled) + if (nic->pdev->msix_enabled) return 0; /* Enable MSI-X */ - if (!nicvf_enable_msix(nic)) + nic->num_vec = pci_msix_vec_count(nic->pdev); + ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, + PCI_IRQ_MSIX); + if (ret < 0) { + netdev_err(nic->netdev, + "Req for #%d msix vectors failed\n", nic->num_vec); return 1; + } sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); /* Register Misc interrupt */ - ret = request_irq(nic->msix_entries[irq].vector, + ret = request_irq(pci_irq_vector(nic->pdev, irq), nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); if (ret) @@ -1076,6 +1177,13 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } + /* In XDP case, initial HW tx queues are used for XDP, + * but stack's queue mapping starts at '0', so skip the + * Tx queues attached to Rx queues for XDP. + */ + if (nic->xdp_prog) + qid += nic->xdp_tx_queues; + snic = nic; /* Get secondary Qset's SQ structure */ if (qid >= MAX_SND_QUEUES_PER_QS) { @@ -1164,7 +1272,7 @@ int nicvf_stop(struct net_device *netdev) /* Wait for pending IRQ handlers to finish */ for (irq = 0; irq < nic->num_vec; irq++) - synchronize_irq(nic->msix_entries[irq].vector); + synchronize_irq(pci_irq_vector(nic->pdev, irq)); tasklet_kill(&nic->rbdr_task); tasklet_kill(&nic->qs_err_task); @@ -1365,7 +1473,7 @@ static int nicvf_set_mac_address(struct net_device *netdev, void *p) memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - if (nic->msix_enabled) { + if (nic->pdev->msix_enabled) { if (nicvf_hw_set_mac_addr(nic, netdev)) return -EBUSY; } else { @@ -1553,6 +1661,114 @@ static int nicvf_set_features(struct net_device *netdev, return 0; } +static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) +{ + u8 cq_count, txq_count; + + /* Set XDP Tx queue count same as Rx queue count */ + if (!bpf_attached) + nic->xdp_tx_queues = 0; + else + nic->xdp_tx_queues = nic->rx_queues; + + /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets + * needs to be allocated, check how many. + */ + txq_count = nic->xdp_tx_queues + nic->tx_queues; + cq_count = max(nic->rx_queues, txq_count); + if (cq_count > MAX_CMP_QUEUES_PER_QS) { + nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); + nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; + } else { + nic->sqs_count = 0; + } + + /* Set primary Qset's resources */ + nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); + nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); + nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); + + /* Update stack */ + nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); +} + +static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) +{ + struct net_device *dev = nic->netdev; + bool if_up = netif_running(nic->netdev); + struct bpf_prog *old_prog; + bool bpf_attached = false; + + /* For now just support only the usual MTU sized frames */ + if (prog && (dev->mtu > 1500)) { + netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + dev->mtu); + return -EOPNOTSUPP; + } + + /* ALL SQs attached to CQs i.e same as RQs, are treated as + * XDP Tx queues and more Tx queues are allocated for + * network stack to send pkts out. + * + * No of Tx queues are either same as Rx queues or whatever + * is left in max no of queues possible. + */ + if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { + netdev_warn(dev, + "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", + nic->max_queues); + return -ENOMEM; + } + + if (if_up) + nicvf_stop(nic->netdev); + + old_prog = xchg(&nic->xdp_prog, prog); + /* Detach old prog, if any */ + if (old_prog) + bpf_prog_put(old_prog); + + if (nic->xdp_prog) { + /* Attach BPF program */ + nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); + if (!IS_ERR(nic->xdp_prog)) + bpf_attached = true; + } + + /* Calculate Tx queues needed for XDP and network stack */ + nicvf_set_xdp_queues(nic, bpf_attached); + + if (if_up) { + /* Reinitialize interface, clean slate */ + nicvf_open(nic->netdev); + netif_trans_update(nic->netdev); + } + + return 0; +} + +static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +{ + struct nicvf *nic = netdev_priv(netdev); + + /* To avoid checks while retrieving buffer address from CQE_RX, + * do not support XDP for T88 pass1.x silicons which are anyway + * not in use widely. + */ + if (pass1_silicon(nic->pdev)) + return -EOPNOTSUPP; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return nicvf_xdp_setup(nic, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = !!nic->xdp_prog; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops nicvf_netdev_ops = { .ndo_open = nicvf_open, .ndo_stop = nicvf_stop, @@ -1563,6 +1779,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_tx_timeout = nicvf_tx_timeout, .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, + .ndo_xdp = nicvf_xdp, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -1665,8 +1882,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_unregister_interrupts; - netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_GRO | + netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HW_VLAN_CTAG_RX); netdev->hw_features |= NETIF_F_RXHASH; @@ -1674,7 +1892,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features |= netdev->hw_features; netdev->hw_features |= NETIF_F_LOOPBACK; - netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; + netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f13289f0d238..2b181762ad49 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -19,16 +19,8 @@ #include "q_struct.h" #include "nicvf_queues.h" -#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0) - -static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) -{ - /* Translation is installed only when IOMMU is present */ - if (nic->iommu_domain) - return iommu_iova_to_phys(nic->iommu_domain, dma_addr); - return dma_addr; -} - +static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, + int size, u64 data); static void nicvf_get_page(struct nicvf *nic) { if (!nic->rb_pageref || !nic->rb_page) @@ -90,46 +82,152 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) dmem->base = NULL; } -/* Allocate buffer for packet reception - * HW returns memory address where packet is DMA'ed but not a pointer - * into RBDR ring, so save buffer address at the start of fragment and - * align the start address to a cache aligned address +#define XDP_PAGE_REFCNT_REFILL 256 + +/* Allocate a new page or recycle one if possible + * + * We cannot optimize dma mapping here, since + * 1. It's only one RBDR ring for 8 Rx queues. + * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed + * and not idx into RBDR ring, so can't refer to saved info. + * 3. There are multiple receive buffers per page */ -static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, - u32 buf_len, u64 **rbuf) +static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic, + struct rbdr *rbdr, gfp_t gfp) +{ + int ref_count; + struct page *page = NULL; + struct pgcache *pgcache, *next; + + /* Check if page is already allocated */ + pgcache = &rbdr->pgcache[rbdr->pgidx]; + page = pgcache->page; + /* Check if page can be recycled */ + if (page) { + ref_count = page_ref_count(page); + /* Check if this page has been used once i.e 'put_page' + * called after packet transmission i.e internal ref_count + * and page's ref_count are equal i.e page can be recycled. + */ + if (rbdr->is_xdp && (ref_count == pgcache->ref_count)) + pgcache->ref_count--; + else + page = NULL; + + /* In non-XDP mode, page's ref_count needs to be '1' for it + * to be recycled. + */ + if (!rbdr->is_xdp && (ref_count != 1)) + page = NULL; + } + + if (!page) { + page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0); + if (!page) + return NULL; + + this_cpu_inc(nic->pnicvf->drv_stats->page_alloc); + + /* Check for space */ + if (rbdr->pgalloc >= rbdr->pgcnt) { + /* Page can still be used */ + nic->rb_page = page; + return NULL; + } + + /* Save the page in page cache */ + pgcache->page = page; + pgcache->dma_addr = 0; + pgcache->ref_count = 0; + rbdr->pgalloc++; + } + + /* Take additional page references for recycling */ + if (rbdr->is_xdp) { + /* Since there is single RBDR (i.e single core doing + * page recycling) per 8 Rx queues, in XDP mode adjusting + * page references atomically is the biggest bottleneck, so + * take bunch of references at a time. + * + * So here, below reference counts defer by '1'. + */ + if (!pgcache->ref_count) { + pgcache->ref_count = XDP_PAGE_REFCNT_REFILL; + page_ref_add(page, XDP_PAGE_REFCNT_REFILL); + } + } else { + /* In non-XDP case, single 64K page is divided across multiple + * receive buffers, so cost of recycling is less anyway. + * So we can do with just one extra reference. + */ + page_ref_add(page, 1); + } + + rbdr->pgidx++; + rbdr->pgidx &= (rbdr->pgcnt - 1); + + /* Prefetch refcount of next page in page cache */ + next = &rbdr->pgcache[rbdr->pgidx]; + page = next->page; + if (page) + prefetch(&page->_refcount); + + return pgcache; +} + +/* Allocate buffer for packet reception */ +static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, + gfp_t gfp, u32 buf_len, u64 *rbuf) { - int order = NICVF_PAGE_ORDER; + struct pgcache *pgcache = NULL; - /* Check if request can be accomodated in previous allocated page */ - if (nic->rb_page && - ((nic->rb_page_offset + buf_len) < (PAGE_SIZE << order))) { + /* Check if request can be accomodated in previous allocated page. + * But in XDP mode only one buffer per page is permitted. + */ + if (!rbdr->is_xdp && nic->rb_page && + ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) { nic->rb_pageref++; goto ret; } nicvf_get_page(nic); + nic->rb_page = NULL; - /* Allocate a new page */ - nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, - order); - if (!nic->rb_page) { + /* Get new page, either recycled or new one */ + pgcache = nicvf_alloc_page(nic, rbdr, gfp); + if (!pgcache && !nic->rb_page) { this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures); return -ENOMEM; } + nic->rb_page_offset = 0; + + /* Reserve space for header modifications by BPF program */ + if (rbdr->is_xdp) + buf_len += XDP_PACKET_HEADROOM; + + /* Check if it's recycled */ + if (pgcache) + nic->rb_page = pgcache->page; ret: - /* HW will ensure data coherency, CPU sync not required */ - *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, + if (rbdr->is_xdp && pgcache && pgcache->dma_addr) { + *rbuf = pgcache->dma_addr; + } else { + /* HW will ensure data coherency, CPU sync not required */ + *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, nic->rb_page_offset, buf_len, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC)); - if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { - if (!nic->rb_page_offset) - __free_pages(nic->rb_page, order); - nic->rb_page = NULL; - return -ENOMEM; + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { + if (!nic->rb_page_offset) + __free_pages(nic->rb_page, 0); + nic->rb_page = NULL; + return -ENOMEM; + } + if (pgcache) + pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM; + nic->rb_page_offset += buf_len; } - nic->rb_page_offset += buf_len; return 0; } @@ -159,7 +257,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len, int buf_size) { int idx; - u64 *rbuf; + u64 rbuf; struct rbdr_entry_t *desc; int err; @@ -177,10 +275,34 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, rbdr->head = 0; rbdr->tail = 0; + /* Initialize page recycling stuff. + * + * Can't use single buffer per page especially with 64K pages. + * On embedded platforms i.e 81xx/83xx available memory itself + * is low and minimum ring size of RBDR is 8K, that takes away + * lots of memory. + * + * But for XDP it has to be a single buffer per page. + */ + if (!nic->pnicvf->xdp_prog) { + rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size); + rbdr->is_xdp = false; + } else { + rbdr->pgcnt = ring_len; + rbdr->is_xdp = true; + } + rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt); + rbdr->pgcache = kzalloc(sizeof(*rbdr->pgcache) * + rbdr->pgcnt, GFP_KERNEL); + if (!rbdr->pgcache) + return -ENOMEM; + rbdr->pgidx = 0; + rbdr->pgalloc = 0; + nic->rb_page = NULL; for (idx = 0; idx < ring_len; idx++) { - err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN, - &rbuf); + err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL, + RCV_FRAG_LEN, &rbuf); if (err) { /* To free already allocated and mapped ones */ rbdr->tail = idx - 1; @@ -188,7 +310,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, } desc = GET_RBDR_DESC(rbdr, idx); - desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); } nicvf_get_page(nic); @@ -201,6 +323,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) { int head, tail; u64 buf_addr, phys_addr; + struct pgcache *pgcache; struct rbdr_entry_t *desc; if (!rbdr) @@ -216,7 +339,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) /* Release page references */ while (head != tail) { desc = GET_RBDR_DESC(rbdr, head); - buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + buf_addr = desc->buf_addr; phys_addr = nicvf_iova_to_phys(nic, buf_addr); dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); @@ -227,13 +350,31 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) } /* Release buffer of tail desc */ desc = GET_RBDR_DESC(rbdr, tail); - buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + buf_addr = desc->buf_addr; phys_addr = nicvf_iova_to_phys(nic, buf_addr); dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); if (phys_addr) put_page(virt_to_page(phys_to_virt(phys_addr))); + /* Sync page cache info */ + smp_rmb(); + + /* Release additional page references held for recycling */ + head = 0; + while (head < rbdr->pgcnt) { + pgcache = &rbdr->pgcache[head]; + if (pgcache->page && page_ref_count(pgcache->page) != 0) { + if (!rbdr->is_xdp) { + put_page(pgcache->page); + continue; + } + page_ref_sub(pgcache->page, pgcache->ref_count - 1); + put_page(pgcache->page); + } + head++; + } + /* Free RBDR ring */ nicvf_free_q_desc_mem(nic, &rbdr->dmem); } @@ -248,7 +389,7 @@ static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) int refill_rb_cnt; struct rbdr *rbdr; struct rbdr_entry_t *desc; - u64 *rbuf; + u64 rbuf; int new_rb = 0; refill: @@ -269,17 +410,20 @@ refill: else refill_rb_cnt = qs->rbdr_len - qcount - 1; + /* Sync page cache info */ + smp_rmb(); + /* Start filling descs from tail */ tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3; while (refill_rb_cnt) { tail++; tail &= (rbdr->dmem.q_len - 1); - if (nicvf_alloc_rcv_buffer(nic, gfp, RCV_FRAG_LEN, &rbuf)) + if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf)) break; desc = GET_RBDR_DESC(rbdr, tail); - desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); refill_rb_cnt--; new_rb++; } @@ -362,7 +506,7 @@ static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq) /* Initialize transmit queue */ static int nicvf_init_snd_queue(struct nicvf *nic, - struct snd_queue *sq, int q_len) + struct snd_queue *sq, int q_len, int qidx) { int err; @@ -375,17 +519,38 @@ static int nicvf_init_snd_queue(struct nicvf *nic, sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL); if (!sq->skbuff) return -ENOMEM; + sq->head = 0; sq->tail = 0; - atomic_set(&sq->free_cnt, q_len - 1); sq->thresh = SND_QUEUE_THRESH; - /* Preallocate memory for TSO segment's header */ - sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, - q_len * TSO_HEADER_SIZE, - &sq->tso_hdrs_phys, GFP_KERNEL); - if (!sq->tso_hdrs) - return -ENOMEM; + /* Check if this SQ is a XDP TX queue */ + if (nic->sqs_mode) + qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS); + if (qidx < nic->pnicvf->xdp_tx_queues) { + /* Alloc memory to save page pointers for XDP_TX */ + sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL); + if (!sq->xdp_page) + return -ENOMEM; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = q_len - 1; + sq->is_xdp = true; + } else { + sq->xdp_page = NULL; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = 0; + sq->is_xdp = false; + + atomic_set(&sq->free_cnt, q_len - 1); + + /* Preallocate memory for TSO segment's header */ + sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, + q_len * TSO_HEADER_SIZE, + &sq->tso_hdrs_phys, + GFP_KERNEL); + if (!sq->tso_hdrs) + return -ENOMEM; + } return 0; } @@ -411,6 +576,7 @@ void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) { struct sk_buff *skb; + struct page *page; struct sq_hdr_subdesc *hdr; struct sq_hdr_subdesc *tso_sqe; @@ -428,8 +594,15 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) smp_rmb(); while (sq->head != sq->tail) { skb = (struct sk_buff *)sq->skbuff[sq->head]; - if (!skb) + if (!skb || !sq->xdp_page) + goto next; + + page = (struct page *)sq->xdp_page[sq->head]; + if (!page) goto next; + else + put_page(page); + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); /* Check for dummy descriptor used for HW TSO offload on 88xx */ if (hdr->dont_send) { @@ -442,12 +615,14 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) nicvf_unmap_sndq_buffers(nic, sq, sq->head, hdr->subdesc_cnt); } - dev_kfree_skb_any(skb); + if (skb) + dev_kfree_skb_any(skb); next: sq->head++; sq->head &= (sq->dmem.q_len - 1); } kfree(sq->skbuff); + kfree(sq->xdp_page); nicvf_free_q_desc_mem(nic, &sq->dmem); } @@ -838,7 +1013,7 @@ static int nicvf_alloc_resources(struct nicvf *nic) /* Alloc send queue */ for (qidx = 0; qidx < qs->sq_cnt; qidx++) { - if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len)) + if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx)) goto alloc_fail; } @@ -876,6 +1051,7 @@ int nicvf_set_qset_resources(struct nicvf *nic) nic->rx_queues = qs->rq_cnt; nic->tx_queues = qs->sq_cnt; + nic->xdp_tx_queues = 0; return 0; } @@ -940,7 +1116,10 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt) int qentry; qentry = sq->tail; - atomic_sub(desc_cnt, &sq->free_cnt); + if (!sq->is_xdp) + atomic_sub(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt -= desc_cnt; sq->tail += desc_cnt; sq->tail &= (sq->dmem.q_len - 1); @@ -958,7 +1137,10 @@ static inline void nicvf_rollback_sq_desc(struct snd_queue *sq, /* Free descriptor back to SQ for future use */ void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt) { - atomic_add(desc_cnt, &sq->free_cnt); + if (!sq->is_xdp) + atomic_add(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt += desc_cnt; sq->head += desc_cnt; sq->head &= (sq->dmem.q_len - 1); } @@ -1016,6 +1198,58 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, } } +/* XDP Transmit APIs */ +void nicvf_xdp_sq_doorbell(struct nicvf *nic, + struct snd_queue *sq, int sq_num) +{ + if (!sq->xdp_desc_cnt) + return; + + /* make sure all memory stores are done before ringing doorbell */ + wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, sq->xdp_desc_cnt); + sq->xdp_desc_cnt = 0; +} + +static inline void +nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, + int subdesc_cnt, u64 data, int len) +{ + struct sq_hdr_subdesc *hdr; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + hdr->subdesc_cnt = subdesc_cnt; + hdr->tot_len = len; + hdr->post_cqe = 1; + sq->xdp_page[qentry] = (u64)virt_to_page((void *)data); +} + +int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, + u64 bufaddr, u64 dma_addr, u16 len) +{ + int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; + int qentry; + + if (subdesc_cnt > sq->xdp_free_cnt) + return 0; + + qentry = nicvf_get_sq_desc(sq, subdesc_cnt); + + nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len); + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr); + + sq->xdp_desc_cnt += subdesc_cnt; + + return 1; +} + /* Calculate no of SQ subdescriptors needed to transmit all * segments of this TSO packet. * Taken from 'Tilera network driver' with a minor modification. @@ -1094,7 +1328,13 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, { int proto; struct sq_hdr_subdesc *hdr; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + ip.hdr = skb_network_header(skb); hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); memset(hdr, 0, SND_QUEUE_DESC_SIZE); hdr->subdesc_type = SQ_DESC_TYPE_HEADER; @@ -1119,7 +1359,9 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, hdr->l3_offset = skb_network_offset(skb); hdr->l4_offset = skb_transport_offset(skb); - proto = ip_hdr(skb)->protocol; + proto = (ip.v4->version == 4) ? ip.v4->protocol : + ip.v6->nexthdr; + switch (proto) { case IPPROTO_TCP: hdr->csum_l4 = SEND_L4_CSUM_TCP; @@ -1366,8 +1608,33 @@ static inline unsigned frag_num(unsigned i) #endif } +static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr, + u64 buf_addr, bool xdp) +{ + struct page *page = NULL; + int len = RCV_FRAG_LEN; + + if (xdp) { + page = virt_to_page(phys_to_virt(buf_addr)); + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) != 1) + return; + + len += XDP_PACKET_HEADROOM; + /* Receive buffers in XDP mode are mapped from page start */ + dma_addr &= PAGE_MASK; + } + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); +} + /* Returns SKB for a received packet */ -struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, + struct cqe_rx_t *cqe_rx, bool xdp) { int frag; int payload_len = 0; @@ -1402,10 +1669,9 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) if (!frag) { /* First fragment */ - dma_unmap_page_attrs(&nic->pdev->dev, - *rb_ptrs - cqe_rx->align_pad, - RCV_FRAG_LEN, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); + nicvf_unmap_rcv_buffer(nic, + *rb_ptrs - cqe_rx->align_pad, + phys_addr, xdp); skb = nicvf_rb_ptr_to_skb(nic, phys_addr - cqe_rx->align_pad, payload_len); @@ -1415,9 +1681,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) skb_put(skb, payload_len); } else { /* Add fragments */ - dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs, - RCV_FRAG_LEN, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); + nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp); page = virt_to_page(phys_to_virt(phys_addr)); offset = phys_to_virt(phys_addr) - page_address(page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, @@ -1547,9 +1811,6 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) /* Check for errors in the receive cmp.queue entry */ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) { - if (!cqe_rx->err_level && !cqe_rx->err_opcode) - return 0; - if (netif_msg_rx_err(nic)) netdev_err(nic->netdev, "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n", @@ -1638,8 +1899,6 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx) { switch (cqe_tx->send_status) { - case CQ_TX_ERROP_GOOD: - return 0; case CQ_TX_ERROP_DESC_FAULT: this_cpu_inc(nic->drv_stats->tx_desc_fault); break; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 10cb4b84625b..57858522c33c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -10,6 +10,7 @@ #define NICVF_QUEUES_H #include <linux/netdevice.h> +#include <linux/iommu.h> #include "q_struct.h" #define MAX_QUEUE_SET 128 @@ -213,6 +214,12 @@ struct q_desc_mem { void *unalign_base; }; +struct pgcache { + struct page *page; + int ref_count; + u64 dma_addr; +}; + struct rbdr { bool enable; u32 dma_size; @@ -222,6 +229,13 @@ struct rbdr { u32 head; u32 tail; struct q_desc_mem dmem; + bool is_xdp; + + /* For page recycling */ + int pgidx; + int pgcnt; + int pgalloc; + struct pgcache *pgcache; } ____cacheline_aligned_in_smp; struct rcv_queue { @@ -258,6 +272,10 @@ struct snd_queue { u32 tail; u64 *skbuff; void *desc; + u64 *xdp_page; + u16 xdp_desc_cnt; + u16 xdp_free_cnt; + bool is_xdp; #define TSO_HEADER_SIZE 128 /* For TSO segment's header */ @@ -301,6 +319,14 @@ struct queue_set { #define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT) +static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) +{ + /* Translation is installed only when IOMMU is present */ + if (nic->iommu_domain) + return iommu_iova_to_phys(nic->iommu_domain, dma_addr); + return dma_addr; +} + void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, int hdr_sqe, u8 subdesc_cnt); void nicvf_config_vlan_stripping(struct nicvf *nic, @@ -318,8 +344,12 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, int qidx); int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, struct sk_buff *skb, u8 sq_num); +int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, + u64 bufaddr, u64 dma_addr, u16 len); +void nicvf_xdp_sq_doorbell(struct nicvf *nic, struct snd_queue *sq, int sq_num); -struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx); +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, + struct cqe_rx_t *cqe_rx, bool xdp); void nicvf_rbdr_task(unsigned long data); void nicvf_rbdr_work(struct work_struct *work); diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h index f36347237a54..e47205aa87ea 100644 --- a/drivers/net/ethernet/cavium/thunder/q_struct.h +++ b/drivers/net/ethernet/cavium/thunder/q_struct.h @@ -359,15 +359,7 @@ union cq_desc_t { }; struct rbdr_entry_t { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 rsvd0:15; - u64 buf_addr:42; - u64 cache_align:7; -#elif defined(__LITTLE_ENDIAN_BITFIELD) - u64 cache_align:7; - u64 buf_addr:42; - u64 rsvd0:15; -#endif + u64 buf_addr; }; /* TCP reassembly context */ |