From 5e9fd733fa34b491e7ac41c91aa42ba0a9d8ea10 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sat, 4 Jul 2015 14:48:33 -0400 Subject: NTB: Add list to MAINTAINERS Add the new NTB mailing list to MAINTAINERS Signed-off-by: Jon Mason --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b60e2b2369d2..754bc7332bcf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7258,6 +7258,7 @@ NTB DRIVER CORE M: Jon Mason M: Dave Jiang M: Allen Hubbe +L: linux-ntb@googlegroups.com S: Supported W: https://github.com/jonmason/ntb/wiki T: git git://github.com/jonmason/ntb.git @@ -7269,6 +7270,7 @@ F: include/linux/ntb_transport.h NTB INTEL DRIVER M: Jon Mason M: Dave Jiang +L: linux-ntb@googlegroups.com S: Supported W: https://github.com/jonmason/ntb/wiki T: git git://github.com/jonmason/ntb.git -- cgit v1.2.3 From e74bfeedad08180b968d8613dcde141ffb0720c3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:17 -0400 Subject: NTB: Add flow control to the ntb_netdev Right now if we push the NTB really hard, we start dropping packets due to not able to process the packets fast enough. We need to st:qop the upper layer from flooding us when that happens. A timer is necessary in order to restart the queue once the resource has been processed on the receive side. Due to the way NTB is setup, the resources on the tx side are tied to the processing of the rx side and there's no async way to know when the rx side has released those resources. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/net/ntb_netdev.c | 77 +++++++++++++++++++++++++++++++++++++++++++ drivers/ntb/ntb_transport.c | 18 +++++++++- include/linux/ntb_transport.h | 1 + 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index d8757bf9ad75..a9acf7156855 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -61,11 +61,21 @@ MODULE_VERSION(NTB_NETDEV_VER); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); +/* Time in usecs for tx resource reaper */ +static unsigned int tx_time = 1; + +/* Number of descriptors to free before resuming tx */ +static unsigned int tx_start = 10; + +/* Number of descriptors still available before stop upper layer tx */ +static unsigned int tx_stop = 5; + struct ntb_netdev { struct list_head list; struct pci_dev *pdev; struct net_device *ndev; struct ntb_transport_qp *qp; + struct timer_list tx_timer; }; #define NTB_TX_TIMEOUT_MS 1000 @@ -136,11 +146,42 @@ enqueue_again: } } +static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev, + struct ntb_transport_qp *qp, int size) +{ + struct ntb_netdev *dev = netdev_priv(netdev); + + netif_stop_queue(netdev); + /* Make sure to see the latest value of ntb_transport_tx_free_entry() + * since the queue was last started. + */ + smp_mb(); + + if (likely(ntb_transport_tx_free_entry(qp) < size)) { + mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); + return -EBUSY; + } + + netif_start_queue(netdev); + return 0; +} + +static int ntb_netdev_maybe_stop_tx(struct net_device *ndev, + struct ntb_transport_qp *qp, int size) +{ + if (netif_queue_stopped(ndev) || + (ntb_transport_tx_free_entry(qp) >= size)) + return 0; + + return __ntb_netdev_maybe_stop_tx(ndev, qp, size); +} + static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { struct net_device *ndev = qp_data; struct sk_buff *skb; + struct ntb_netdev *dev = netdev_priv(ndev); skb = data; if (!skb || !ndev) @@ -155,6 +196,15 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, } dev_kfree_skb(skb); + + if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) { + /* Make sure anybody stopping the queue after this sees the new + * value of ntb_transport_tx_free_entry() + */ + smp_mb(); + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } } static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, @@ -163,10 +213,15 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, struct ntb_netdev *dev = netdev_priv(ndev); int rc; + ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len); if (rc) goto err; + /* check for next submit */ + ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + return NETDEV_TX_OK; err: @@ -175,6 +230,23 @@ err: return NETDEV_TX_BUSY; } +static void ntb_netdev_tx_timer(unsigned long data) +{ + struct net_device *ndev = (struct net_device *)data; + struct ntb_netdev *dev = netdev_priv(ndev); + + if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) { + mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time)); + } else { + /* Make sure anybody stopping the queue after this sees the new + * value of ntb_transport_tx_free_entry() + */ + smp_mb(); + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } +} + static int ntb_netdev_open(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); @@ -197,8 +269,11 @@ static int ntb_netdev_open(struct net_device *ndev) } } + setup_timer(&dev->tx_timer, ntb_netdev_tx_timer, (unsigned long)ndev); + netif_carrier_off(ndev); ntb_transport_link_up(dev->qp); + netif_start_queue(ndev); return 0; @@ -219,6 +294,8 @@ static int ntb_netdev_close(struct net_device *ndev) while ((skb = ntb_transport_rx_remove(dev->qp, &len))) dev_kfree_skb(skb); + del_timer_sync(&dev->tx_timer); + return 0; } diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 1c6386d5f79c..0d851d684523 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -494,6 +494,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, "tx_index - \t%u\n", qp->tx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_max_entry - \t%u\n", qp->tx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "qp->remote_rx_info->entry - \t%u\n", + qp->remote_rx_info->entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "free tx - \t%u\n", + ntb_transport_tx_free_entry(qp)); out_offset += snprintf(buf + out_offset, out_count - out_offset, "\nQP Link %s\n", @@ -535,6 +541,7 @@ static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock, } entry = list_first_entry(list, struct ntb_queue_entry, entry); list_del(&entry->entry); + out: spin_unlock_irqrestore(lock, flags); @@ -1843,7 +1850,7 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); if (!entry) { qp->tx_err_no_buf++; - return -ENOMEM; + return -EBUSY; } entry->cb_data = cb; @@ -1969,6 +1976,15 @@ unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) } EXPORT_SYMBOL_GPL(ntb_transport_max_size); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp) +{ + unsigned int head = qp->tx_index; + unsigned int tail = qp->remote_rx_info->entry; + + return tail > head ? tail - head : qp->tx_max_entry + tail - head; +} +EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry); + static void ntb_transport_doorbell_callback(void *data, int vector) { struct ntb_transport_ctx *nt = data; diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h index 2862861366a5..7243eb98a722 100644 --- a/include/linux/ntb_transport.h +++ b/include/linux/ntb_transport.h @@ -83,3 +83,4 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); void ntb_transport_link_up(struct ntb_transport_qp *qp); void ntb_transport_link_down(struct ntb_transport_qp *qp); bool ntb_transport_link_query(struct ntb_transport_qp *qp); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp); -- cgit v1.2.3 From 0a5d19d9f046d770776508fdde959d2a42bce9f7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:18 -0400 Subject: NTB: Add PCI Device IDs for Broadwell Xeon Adding PCI Device IDs for B2B (back to back), RP (root port, primary), and TB (transparent bridge, secondary) devices. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 15 +++++++++++++++ drivers/ntb/hw/intel/ntb_hw_intel.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 87751cfd6f4f..c2bc56b67e63 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -190,14 +190,17 @@ static inline int pdev_is_xeon(struct pci_dev *pdev) case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: return 1; } return 0; @@ -1843,6 +1846,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; break; } @@ -1857,6 +1863,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; break; } @@ -1878,6 +1887,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev) case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + case PCI_DEVICE_ID_INTEL_NTB_SS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_PS_BDX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX: ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; break; } @@ -2234,14 +2246,17 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BDX)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_BDX)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)}, {0} }; MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index 7ddaf387b679..ea0612f797df 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -67,6 +67,9 @@ #define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E #define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F #define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E +#define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX 0x6F0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F /* Intel Xeon hardware */ -- cgit v1.2.3 From 315100004fd6d9189b033f3bf9c5eba9eb906705 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:19 -0400 Subject: NTB: Make the transport list in order of discovery The list should be added from the bottom and not the top in order to ensure the transport is provided in the same order to clients as ntb devices are discovered. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 0d851d684523..29553fb7fb8e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -297,7 +297,7 @@ static LIST_HEAD(ntb_transport_list); static int ntb_bus_init(struct ntb_transport_ctx *nt) { - list_add(&nt->entry, &ntb_transport_list); + list_add_tail(&nt->entry, &ntb_transport_list); return 0; } -- cgit v1.2.3 From d98ef99e378b0d5c42be928d6f2abe08a5f9ce53 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:20 -0400 Subject: NTB: Clean up QP stats info Make QP stats info more readable for debugging purposes. Also add an entry to indicate whether DMA is being used. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 29553fb7fb8e..777436c47679 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -452,7 +452,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset = 0; out_offset += snprintf(buf + out_offset, out_count - out_offset, - "NTB QP stats\n"); + "\nNTB QP stats:\n\n"); out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_bytes - \t%llu\n", qp->rx_bytes); out_offset += snprintf(buf + out_offset, out_count - out_offset, @@ -470,11 +470,11 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_err_ver - \t%llu\n", qp->rx_err_ver); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_buff - \t%p\n", qp->rx_buff); + "rx_buff - \t0x%p\n", qp->rx_buff); out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n", qp->rx_max_entry); + "rx_max_entry - \t%u\n\n", qp->rx_max_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); @@ -489,21 +489,28 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_mw - \t%p\n", qp->tx_mw); + "tx_mw - \t0x%p\n", qp->tx_mw); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_index - \t%u\n", qp->tx_index); + "tx_index (H) - \t%u\n", qp->tx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "tx_max_entry - \t%u\n", qp->tx_max_entry); - out_offset += snprintf(buf + out_offset, out_count - out_offset, - "qp->remote_rx_info->entry - \t%u\n", + "RRI (T) - \t%u\n", qp->remote_rx_info->entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "tx_max_entry - \t%u\n", qp->tx_max_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "\nQP Link %s\n", + "\n"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Using DMA - \t%s\n", use_dma ? "Yes" : "No"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "\n"); + if (out_offset > out_count) out_offset = out_count; -- cgit v1.2.3 From 905921e74864e80228e7f8cfe75315cd0a8cada8 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 13 Jul 2015 08:07:21 -0400 Subject: NTB: Remove dma_sync_wait from ntb_async_rx The dma_sync_wait can hurt the performance of workloads mixed with both large and small frames. Large frames will be copied using the dma engine. Small frames will be copied by the cpu. The dma_sync_wait prevents the cpu and dma engine copying in parallel. In the period where the cpu is copying, the dma engine is stopped. The dma engine is not doing any useful work to copy large frames during that time, and the additional time to restart the dma engine for the next large frame. This will decrease the throughput for the portion of a workload with large frames. In the period where the dma engine is copying, the cpu is held up waiting for dma to complete. The small frames processing will be delayed until the dma is complete. The RX frames are completed in-order, and the processing of small frames takes very little time, so dma_sync_wait may have an insignificant impact on the respose time of frames. The more significant impact is to the system, because the delay in dma_sync_wait is implemented as busy non-blocking wait. This can prevent the delayed core from doing any useful work, even if it could be processing work for other drivers, unrelated to transport RX processing. After applying the earlier patch to fix out-of-order RX acknoledgement, the dma_sync_wait is no longer necessary. Remove it, so that cpu memcpy will proceed immediately for small frames, in parallel with ongoing dma for large frames. Do not hold up the cpu from doing work while dma is in progress. The prior fix will continue to ensure in-order completion of the RX frames to the upper layer, and in-order delivery of the RX acknoledgement. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 777436c47679..f6aae0fbde48 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1233,18 +1233,18 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) goto err; if (len < copy_bytes) - goto err_wait; + goto err; device = chan->device; pay_off = (size_t)offset & ~PAGE_MASK; buff_off = (size_t)buf & ~PAGE_MASK; if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) - goto err_wait; + goto err; unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); if (!unmap) - goto err_wait; + goto err; unmap->len = len; unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset), @@ -1287,12 +1287,6 @@ err_set_unmap: dmaengine_unmap_put(unmap); err_get_unmap: dmaengine_unmap_put(unmap); -err_wait: - /* If the callbacks come out of order, the writing of the index to the - * last completed will be out of order. This may result in the - * receive stalling forever. - */ - dma_sync_wait(chan, qp->last_cookie); err: ntb_memcpy_rx(entry, offset); qp->rx_memcpy++; -- cgit v1.2.3 From 569410ca756cd3ebb15609cb6828a8393fb6384d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:22 -0400 Subject: NTB: Use unique DMA channels for TX and RX Allocate two DMA channels, one for TX operation and one for RX operation, instead of having one DMA channel for everything. This provides slightly better performance, and also will make error handling cleaner later on. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 77 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f6aae0fbde48..6e3ee907d186 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -119,7 +119,8 @@ struct ntb_transport_qp { struct ntb_transport_ctx *transport; struct ntb_dev *ndev; void *cb_data; - struct dma_chan *dma_chan; + struct dma_chan *tx_dma_chan; + struct dma_chan *rx_dma_chan; bool client_ready; bool link_is_up; @@ -504,7 +505,11 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "\n"); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "Using DMA - \t%s\n", use_dma ? "Yes" : "No"); + "Using TX DMA - \t%s\n", + qp->tx_dma_chan ? "Yes" : "No"); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Using RX DMA - \t%s\n", + qp->rx_dma_chan ? "Yes" : "No"); out_offset += snprintf(buf + out_offset, out_count - out_offset, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); @@ -1220,7 +1225,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) { struct dma_async_tx_descriptor *txd; struct ntb_transport_qp *qp = entry->qp; - struct dma_chan *chan = qp->dma_chan; + struct dma_chan *chan = qp->rx_dma_chan; struct dma_device *device; size_t pay_off, buff_off, len; struct dmaengine_unmap_data *unmap; @@ -1381,8 +1386,8 @@ static void ntb_transport_rxc_db(unsigned long data) break; } - if (i && qp->dma_chan) - dma_async_issue_pending(qp->dma_chan); + if (i && qp->rx_dma_chan) + dma_async_issue_pending(qp->rx_dma_chan); if (i == qp->rx_max_entry) { /* there is more work to do */ @@ -1449,7 +1454,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, { struct ntb_payload_header __iomem *hdr; struct dma_async_tx_descriptor *txd; - struct dma_chan *chan = qp->dma_chan; + struct dma_chan *chan = qp->tx_dma_chan; struct dma_device *device; size_t dest_off, buff_off; struct dmaengine_unmap_data *unmap; @@ -1642,14 +1647,27 @@ ntb_transport_create_queue(void *data, struct device *client_dev, dma_cap_set(DMA_MEMCPY, dma_mask); if (use_dma) { - qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, - (void *)(unsigned long)node); - if (!qp->dma_chan) - dev_info(&pdev->dev, "Unable to allocate DMA channel\n"); + qp->tx_dma_chan = + dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->tx_dma_chan) + dev_info(&pdev->dev, "Unable to allocate TX DMA channel\n"); + + qp->rx_dma_chan = + dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->rx_dma_chan) + dev_info(&pdev->dev, "Unable to allocate RX DMA channel\n"); } else { - qp->dma_chan = NULL; + qp->tx_dma_chan = NULL; + qp->rx_dma_chan = NULL; } - dev_dbg(&pdev->dev, "Using %s memcpy\n", qp->dma_chan ? "DMA" : "CPU"); + + dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", + qp->tx_dma_chan ? "DMA" : "CPU"); + + dev_dbg(&pdev->dev, "Using %s memcpy for RX\n", + qp->rx_dma_chan ? "DMA" : "CPU"); for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); @@ -1684,8 +1702,10 @@ err2: err1: while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); - if (qp->dma_chan) - dma_release_channel(qp->dma_chan); + if (qp->tx_dma_chan) + dma_release_channel(qp->tx_dma_chan); + if (qp->rx_dma_chan) + dma_release_channel(qp->rx_dma_chan); nt->qp_bitmap_free |= qp_bit; err: return NULL; @@ -1709,12 +1729,27 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) pdev = qp->ndev->pdev; - if (qp->dma_chan) { - struct dma_chan *chan = qp->dma_chan; + if (qp->tx_dma_chan) { + struct dma_chan *chan = qp->tx_dma_chan; + /* Putting the dma_chan to NULL will force any new traffic to be + * processed by the CPU instead of the DAM engine + */ + qp->tx_dma_chan = NULL; + + /* Try to be nice and wait for any queued DMA engine + * transactions to process before smashing it with a rock + */ + dma_sync_wait(chan, qp->last_cookie); + dmaengine_terminate_all(chan); + dma_release_channel(chan); + } + + if (qp->rx_dma_chan) { + struct dma_chan *chan = qp->rx_dma_chan; /* Putting the dma_chan to NULL will force any new traffic to be * processed by the CPU instead of the DAM engine */ - qp->dma_chan = NULL; + qp->rx_dma_chan = NULL; /* Try to be nice and wait for any queued DMA engine * transactions to process before smashing it with a rock @@ -1962,16 +1997,20 @@ EXPORT_SYMBOL_GPL(ntb_transport_qp_num); unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) { unsigned int max; + unsigned int copy_align; if (!qp) return 0; - if (!qp->dma_chan) + if (!qp->tx_dma_chan && !qp->rx_dma_chan) return qp->tx_max_frame - sizeof(struct ntb_payload_header); + copy_align = max(qp->tx_dma_chan->device->copy_align, + qp->rx_dma_chan->device->copy_align); + /* If DMA engine usage is possible, try to find the max size for that */ max = qp->tx_max_frame - sizeof(struct ntb_payload_header); - max -= max % (1 << qp->dma_chan->device->copy_align); + max -= max % (1 << copy_align); return max; } -- cgit v1.2.3 From a7c23237481782fbea3c2230e362b72863e144b0 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Wed, 15 Jul 2015 04:15:28 -0400 Subject: NTB: Fix documentation for ntb_link_is_up There was a copy and paste error in the documentation for ntb_link_is_up. The long description was mistakenly copied from ntb_link_set_trans. This adds the appropriate long description for ntb_link_is_up. Reported-by: Dave Jiang Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/linux/ntb.h b/include/linux/ntb.h index b02f72bb8e32..e3d3299c6052 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -522,10 +522,9 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) * @speed: OUT - The link speed expressed as PCIe generation number. * @width: OUT - The link width expressed as the number of PCIe lanes. * - * Set the translation of a memory window. The peer may access local memory - * through the window starting at the address, up to the size. The address - * must be aligned to the alignment specified by ntb_mw_get_range(). The size - * must be aligned to the size alignment specified by ntb_mw_get_range(). + * Get the current state of the ntb link. It is recommended to query the link + * state once after every link event. It is safe to query the link state in + * the context of the link event callback. * * Return: One if the link is up, zero if the link is down, otherwise a * negative value indicating the error number. -- cgit v1.2.3 From 86663c91866ae85c219f1a80ef2c9460b7ca5cd8 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Wed, 15 Jul 2015 12:43:21 -0400 Subject: NTB: Fix documentation for ntb_peer_db_clear. The documentation should say "peer" not "local" when referring to the peer doorbell register. Reported-by: Dave Jiang Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ntb.h b/include/linux/ntb.h index e3d3299c6052..f798e2afba88 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -794,7 +794,7 @@ static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) } /** - * ntb_peer_db_clear() - clear bits in the local doorbell register + * ntb_peer_db_clear() - clear bits in the peer doorbell register * @ntb: NTB device context. * @db_bits: Doorbell bits to clear. * -- cgit v1.2.3 From 2aa2a77a489deda473c99a4c15074d092718912c Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 31 Aug 2015 09:30:59 -0400 Subject: NTB: Improve index handling in B2B MW workaround Check that b2b_mw_idx is in range of the number of memory windows when initializing the device. The workaround is considered to be in effect only if the device b2b_idx is exactly UINT_MAX, instead of any index past the last memory window. Only print B2B MW workaround information in debugfs if the workaround is in effect. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index c2bc56b67e63..fc6af2da8df0 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -575,10 +575,13 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "Connection Topology -\t%s\n", ntb_topo_string(ndev->ntb.topo)); - off += scnprintf(buf + off, buf_size - off, - "B2B Offset -\t\t%#lx\n", ndev->b2b_off); - off += scnprintf(buf + off, buf_size - off, - "B2B MW Idx -\t\t%d\n", ndev->b2b_idx); + if (ndev->b2b_idx != UINT_MAX) { + off += scnprintf(buf + off, buf_size - off, + "B2B MW Idx -\t\t%u\n", ndev->b2b_idx); + off += scnprintf(buf + off, buf_size - off, + "B2B Offset -\t\t%#lx\n", ndev->b2b_off); + } + off += scnprintf(buf + off, buf_size - off, "BAR4 Split -\t\t%s\n", ndev->bar4_split ? "yes" : "no"); @@ -1487,7 +1490,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, pdev = ndev_pdev(ndev); mmio = ndev->self_mmio; - if (ndev->b2b_idx >= ndev->mw_count) { + if (ndev->b2b_idx == UINT_MAX) { dev_dbg(ndev_dev(ndev), "not using b2b mw\n"); b2b_bar = 0; ndev->b2b_off = 0; @@ -1779,6 +1782,13 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev) else ndev->b2b_idx = b2b_mw_idx; + if (ndev->b2b_idx >= ndev->mw_count) { + dev_dbg(ndev_dev(ndev), + "b2b_mw_idx %d invalid for mw_count %u\n", + b2b_mw_idx, ndev->mw_count); + return -EINVAL; + } + dev_dbg(ndev_dev(ndev), "setting up b2b mw idx %d means %d\n", b2b_mw_idx, ndev->b2b_idx); @@ -2008,7 +2018,7 @@ static inline void ndev_init_struct(struct intel_ntb_dev *ndev, ndev->ntb.ops = &intel_ntb_ops; ndev->b2b_off = 0; - ndev->b2b_idx = INT_MAX; + ndev->b2b_idx = UINT_MAX; ndev->bar4_split = 0; -- cgit v1.2.3 From 9a07826f99034202dad589285a47132685d9538b Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 31 Aug 2015 09:31:00 -0400 Subject: NTB: Fix range check on memory window index The range check must exclude the upper bound. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index fc6af2da8df0..865a3e3cc581 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -240,7 +240,7 @@ static inline int ndev_ignore_unsafe(struct intel_ntb_dev *ndev, static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx) { - if (idx < 0 || idx > ndev->mw_count) + if (idx < 0 || idx >= ndev->mw_count) return -EINVAL; return ndev->reg->mw_bar[idx]; } -- cgit v1.2.3