summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-05 23:56:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-05 23:56:11 -0400
commit32199ec3cf8db2de1709cec9339844555b55c16e (patch)
treeb2c6d465919cd114b9bdafafbf66d44e7bb0e35e /drivers
parenta02040d8d5d533773f98e02e1a8e56db5fa7a363 (diff)
parent95f1464f695055c72de6044d7c8a2a7a1e0c7ea2 (diff)
downloadlinux-32199ec3cf8db2de1709cec9339844555b55c16e.tar.gz
linux-32199ec3cf8db2de1709cec9339844555b55c16e.tar.bz2
linux-32199ec3cf8db2de1709cec9339844555b55c16e.zip
Merge tag 'ntb-4.8' of git://github.com/jonmason/ntb
Pull NTB updates from Jon Mason: "NTB bug fixes for the ntb_tool and ntb_perf, and improvements to the ntb_perf and ntb_pingpong for increased debugability. Also, modification to the ntb_transport layer to increase/decrease the number of transport entries depending on the ring size" * tag 'ntb-4.8' of git://github.com/jonmason/ntb: NTB: ntb_hw_intel: use local variable pdev NTB: ntb_hw_intel: show BAR size in debugfs info ntb_test: Add a selftest script for the NTB subsystem ntb_perf: clear link_is_up flag when the link goes down. ntb_pingpong: Add a debugfs file to get the ping count ntb_tool: Add link status and files to debugfs ntb_tool: Postpone memory window initialization for the user ntb_perf: Wait for link before running test ntb_perf: Return results by reading the run file ntb_perf: Improve thread handling to increase robustness ntb_perf: Schedule based on time not on performance ntb_transport: Check the number of spads the hardware supports ntb_tool: Add memory window debug support ntb_perf: Allow limiting the size of the memory windows NTB: allocate number transport entries depending on size of ring size ntb_tool: BUG: Ensure the buffer size is large enough to return all spads ntb_tool: Fix infinite loop bug when writing spad/peer_spad file
Diffstat (limited to 'drivers')
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.c49
-rw-r--r--drivers/ntb/ntb_transport.c38
-rw-r--r--drivers/ntb/test/ntb_perf.c240
-rw-r--r--drivers/ntb/test/ntb_pingpong.c62
-rw-r--r--drivers/ntb/test/ntb_tool.c459
5 files changed, 749 insertions, 99 deletions
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 40d04ef5da9e..0d5c29ae51de 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -551,13 +551,15 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *offp)
{
struct intel_ntb_dev *ndev;
+ struct pci_dev *pdev;
void __iomem *mmio;
char *buf;
size_t buf_size;
ssize_t ret, off;
- union { u64 v64; u32 v32; u16 v16; } u;
+ union { u64 v64; u32 v32; u16 v16; u8 v8; } u;
ndev = filp->private_data;
+ pdev = ndev_pdev(ndev);
mmio = ndev->self_mmio;
buf_size = min(count, 0x800ul);
@@ -632,6 +634,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
"Doorbell Bell -\t\t%#llx\n", u.v64);
off += scnprintf(buf + off, buf_size - off,
+ "\nNTB Window Size:\n");
+
+ pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "PBAR23SZ %hhu\n", u.v8);
+ if (!ndev->bar4_split) {
+ pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "PBAR45SZ %hhu\n", u.v8);
+ } else {
+ pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "PBAR4SZ %hhu\n", u.v8);
+ pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "PBAR5SZ %hhu\n", u.v8);
+ }
+
+ pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "SBAR23SZ %hhu\n", u.v8);
+ if (!ndev->bar4_split) {
+ pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "SBAR45SZ %hhu\n", u.v8);
+ } else {
+ pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "SBAR4SZ %hhu\n", u.v8);
+ pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &u.v8);
+ off += scnprintf(buf + off, buf_size - off,
+ "SBAR5SZ %hhu\n", u.v8);
+ }
+
+ off += scnprintf(buf + off, buf_size - off,
"\nNTB Incoming XLAT:\n");
u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 2));
@@ -669,7 +706,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
"LMT45 -\t\t\t%#018llx\n", u.v64);
}
- if (pdev_is_xeon(ndev->ntb.pdev)) {
+ if (pdev_is_xeon(pdev)) {
if (ntb_topo_is_b2b(ndev->ntb.topo)) {
off += scnprintf(buf + off, buf_size - off,
"\nNTB Outgoing B2B XLAT:\n");
@@ -750,22 +787,22 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
off += scnprintf(buf + off, buf_size - off,
"\nXEON NTB Hardware Errors:\n");
- if (!pci_read_config_word(ndev->ntb.pdev,
+ if (!pci_read_config_word(pdev,
XEON_DEVSTS_OFFSET, &u.v16))
off += scnprintf(buf + off, buf_size - off,
"DEVSTS -\t\t%#06x\n", u.v16);
- if (!pci_read_config_word(ndev->ntb.pdev,
+ if (!pci_read_config_word(pdev,
XEON_LINK_STATUS_OFFSET, &u.v16))
off += scnprintf(buf + off, buf_size - off,
"LNKSTS -\t\t%#06x\n", u.v16);
- if (!pci_read_config_dword(ndev->ntb.pdev,
+ if (!pci_read_config_dword(pdev,
XEON_UNCERRSTS_OFFSET, &u.v32))
off += scnprintf(buf + off, buf_size - off,
"UNCERRSTS -\t\t%#06x\n", u.v32);
- if (!pci_read_config_dword(ndev->ntb.pdev,
+ if (!pci_read_config_dword(pdev,
XEON_CORERRSTS_OFFSET, &u.v32))
off += scnprintf(buf + off, buf_size - off,
"CORERRSTS -\t\t%#06x\n", u.v32);
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 2ef9d9130864..d5c5894f252e 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -153,6 +153,7 @@ struct ntb_transport_qp {
unsigned int rx_index;
unsigned int rx_max_entry;
unsigned int rx_max_frame;
+ unsigned int rx_alloc_entry;
dma_cookie_t last_cookie;
struct tasklet_struct rxc_db_work;
@@ -480,7 +481,9 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_index - \t%u\n", qp->rx_index);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "rx_max_entry - \t%u\n\n", qp->rx_max_entry);
+ "rx_max_entry - \t%u\n", qp->rx_max_entry);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_bytes - \t%llu\n", qp->tx_bytes);
@@ -597,9 +600,12 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
{
struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
struct ntb_transport_mw *mw;
+ struct ntb_dev *ndev = nt->ndev;
+ struct ntb_queue_entry *entry;
unsigned int rx_size, num_qps_mw;
unsigned int mw_num, mw_count, qp_count;
unsigned int i;
+ int node;
mw_count = nt->mw_count;
qp_count = nt->qp_count;
@@ -626,6 +632,23 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
qp->rx_max_entry = rx_size / qp->rx_max_frame;
qp->rx_index = 0;
+ /*
+ * Checking to see if we have more entries than the default.
+ * We should add additional entries if that is the case so we
+ * can be in sync with the transport frames.
+ */
+ node = dev_to_node(&ndev->dev);
+ for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
+ entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->qp = qp;
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
+ &qp->rx_free_q);
+ qp->rx_alloc_entry++;
+ }
+
qp->remote_rx_info->entry = qp->rx_max_entry - 1;
/* setup the hdr offsets with 0's */
@@ -1037,6 +1060,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
int node;
int rc, i;
+ mw_count = ntb_mw_count(ndev);
+ if (ntb_spad_count(ndev) < (NUM_MWS + 1 + mw_count * 2)) {
+ dev_err(&ndev->dev, "Not enough scratch pad registers for %s",
+ NTB_TRANSPORT_NAME);
+ return -EIO;
+ }
+
if (ntb_db_is_unsafe(ndev))
dev_dbg(&ndev->dev,
"doorbell is unsafe, proceed anyway...\n");
@@ -1052,8 +1082,6 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
nt->ndev = ndev;
- mw_count = ntb_mw_count(ndev);
-
nt->mw_count = mw_count;
nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec),
@@ -1722,8 +1750,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
&qp->rx_free_q);
}
+ qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES;
- for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+ for (i = 0; i < qp->tx_max_entry; i++) {
entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node);
if (!entry)
goto err2;
@@ -1744,6 +1773,7 @@ err2:
while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
kfree(entry);
err1:
+ qp->rx_alloc_entry = 0;
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry);
if (qp->tx_dma_chan)
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 8dfce9c9aad0..6a50f20bf1cd 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -58,6 +58,7 @@
#include <linux/delay.h>
#include <linux/sizes.h>
#include <linux/ntb.h>
+#include <linux/mutex.h>
#define DRIVER_NAME "ntb_perf"
#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
@@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
static struct dentry *perf_debugfs_dir;
+static unsigned long max_mw_size;
+module_param(max_mw_size, ulong, 0644);
+MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
+
static unsigned int seg_order = 19; /* 512K */
module_param(seg_order, uint, 0644);
MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
@@ -117,6 +122,10 @@ struct pthr_ctx {
int dma_prep_err;
int src_idx;
void *srcs[MAX_SRCS];
+ wait_queue_head_t *wq;
+ int status;
+ u64 copied;
+ u64 diff_us;
};
struct perf_ctx {
@@ -124,23 +133,23 @@ struct perf_ctx {
spinlock_t db_lock;
struct perf_mw mw;
bool link_is_up;
- struct work_struct link_cleanup;
struct delayed_work link_work;
+ wait_queue_head_t link_wq;
struct dentry *debugfs_node_dir;
struct dentry *debugfs_run;
struct dentry *debugfs_threads;
u8 perf_threads;
- bool run;
+ /* mutex ensures only one set of threads run at once */
+ struct mutex run_mutex;
struct pthr_ctx pthr_ctx[MAX_THREADS];
atomic_t tsync;
+ atomic_t tdone;
};
enum {
VERSION = 0,
MW_SZ_HIGH,
MW_SZ_LOW,
- SPAD_MSG,
- SPAD_ACK,
MAX_SPAD
};
@@ -148,10 +157,16 @@ static void perf_link_event(void *ctx)
{
struct perf_ctx *perf = ctx;
- if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+ if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) {
schedule_delayed_work(&perf->link_work, 2*HZ);
- else
- schedule_work(&perf->link_cleanup);
+ } else {
+ dev_dbg(&perf->ntb->pdev->dev, "link down\n");
+
+ if (!perf->link_is_up)
+ cancel_delayed_work_sync(&perf->link_work);
+
+ perf->link_is_up = false;
+ }
}
static void perf_db_event(void *ctx, int vec)
@@ -271,6 +286,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
char __iomem *tmp = dst;
u64 perf, diff_us;
ktime_t kstart, kstop, kdiff;
+ unsigned long last_sleep = jiffies;
chunks = div64_u64(win_size, buf_size);
total_chunks = div64_u64(total, buf_size);
@@ -286,30 +302,40 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
} else
tmp += buf_size;
- /* Probably should schedule every 4GB to prevent soft hang. */
- if (((copied % SZ_4G) == 0) && !use_dma) {
+ /* Probably should schedule every 5s to prevent soft hang. */
+ if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
+ last_sleep = jiffies;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
}
+
+ if (unlikely(kthread_should_stop()))
+ break;
}
if (use_dma) {
- pr_info("%s: All DMA descriptors submitted\n", current->comm);
- while (atomic_read(&pctx->dma_sync) != 0)
+ pr_debug("%s: All DMA descriptors submitted\n", current->comm);
+ while (atomic_read(&pctx->dma_sync) != 0) {
+ if (kthread_should_stop())
+ break;
msleep(20);
+ }
}
kstop = ktime_get();
kdiff = ktime_sub(kstop, kstart);
diff_us = ktime_to_us(kdiff);
- pr_info("%s: copied %llu bytes\n", current->comm, copied);
+ pr_debug("%s: copied %llu bytes\n", current->comm, copied);
- pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+ pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us);
perf = div64_u64(copied, diff_us);
- pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+ pr_debug("%s: MBytes/s: %llu\n", current->comm, perf);
+
+ pctx->copied = copied;
+ pctx->diff_us = diff_us;
return 0;
}
@@ -331,7 +357,7 @@ static int ntb_perf_thread(void *data)
int rc, node, i;
struct dma_chan *dma_chan = NULL;
- pr_info("kthread %s starting...\n", current->comm);
+ pr_debug("kthread %s starting...\n", current->comm);
node = dev_to_node(&pdev->dev);
@@ -389,7 +415,10 @@ static int ntb_perf_thread(void *data)
pctx->srcs[i] = NULL;
}
- return 0;
+ atomic_inc(&perf->tdone);
+ wake_up(pctx->wq);
+ rc = 0;
+ goto done;
err:
for (i = 0; i < MAX_SRCS; i++) {
@@ -402,6 +431,16 @@ err:
pctx->dma_chan = NULL;
}
+done:
+ /* Wait until we are told to stop */
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop())
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
return rc;
}
@@ -472,6 +511,10 @@ static void perf_link_work(struct work_struct *work)
dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
size = perf->mw.phys_size;
+
+ if (max_mw_size && size > max_mw_size)
+ size = max_mw_size;
+
ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
@@ -496,6 +539,7 @@ static void perf_link_work(struct work_struct *work)
goto out1;
perf->link_is_up = true;
+ wake_up(&perf->link_wq);
return;
@@ -508,18 +552,6 @@ out:
msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
}
-static void perf_link_cleanup(struct work_struct *work)
-{
- struct perf_ctx *perf = container_of(work,
- struct perf_ctx,
- link_cleanup);
-
- dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
-
- if (!perf->link_is_up)
- cancel_delayed_work_sync(&perf->link_work);
-}
-
static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
{
struct perf_mw *mw;
@@ -544,16 +576,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
{
struct perf_ctx *perf = filp->private_data;
char *buf;
- ssize_t ret, out_offset;
+ ssize_t ret, out_off = 0;
+ struct pthr_ctx *pctx;
+ int i;
+ u64 rate;
if (!perf)
return 0;
- buf = kmalloc(64, GFP_KERNEL);
+ buf = kmalloc(1024, GFP_KERNEL);
if (!buf)
return -ENOMEM;
- out_offset = snprintf(buf, 64, "%d\n", perf->run);
- ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+
+ if (mutex_is_locked(&perf->run_mutex)) {
+ out_off = snprintf(buf, 64, "running\n");
+ goto read_from_buf;
+ }
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ pctx = &perf->pthr_ctx[i];
+
+ if (pctx->status == -ENODATA)
+ break;
+
+ if (pctx->status) {
+ out_off += snprintf(buf + out_off, 1024 - out_off,
+ "%d: error %d\n", i,
+ pctx->status);
+ continue;
+ }
+
+ rate = div64_u64(pctx->copied, pctx->diff_us);
+ out_off += snprintf(buf + out_off, 1024 - out_off,
+ "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
+ i, pctx->copied, pctx->diff_us, rate);
+ }
+
+read_from_buf:
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off);
kfree(buf);
return ret;
@@ -564,80 +624,90 @@ static void threads_cleanup(struct perf_ctx *perf)
struct pthr_ctx *pctx;
int i;
- perf->run = false;
for (i = 0; i < MAX_THREADS; i++) {
pctx = &perf->pthr_ctx[i];
if (pctx->thread) {
- kthread_stop(pctx->thread);
+ pctx->status = kthread_stop(pctx->thread);
pctx->thread = NULL;
}
}
}
+static void perf_clear_thread_status(struct perf_ctx *perf)
+{
+ int i;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ perf->pthr_ctx[i].status = -ENODATA;
+}
+
static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *offp)
{
struct perf_ctx *perf = filp->private_data;
int node, i;
+ DECLARE_WAIT_QUEUE_HEAD(wq);
- if (!perf->link_is_up)
- return 0;
+ if (wait_event_interruptible(perf->link_wq, perf->link_is_up))
+ return -ENOLINK;
if (perf->perf_threads == 0)
- return 0;
+ return -EINVAL;
- if (atomic_read(&perf->tsync) == 0)
- perf->run = false;
+ if (!mutex_trylock(&perf->run_mutex))
+ return -EBUSY;
- if (perf->run)
- threads_cleanup(perf);
- else {
- perf->run = true;
+ perf_clear_thread_status(perf);
- if (perf->perf_threads > MAX_THREADS) {
- perf->perf_threads = MAX_THREADS;
- pr_info("Reset total threads to: %u\n", MAX_THREADS);
- }
+ if (perf->perf_threads > MAX_THREADS) {
+ perf->perf_threads = MAX_THREADS;
+ pr_info("Reset total threads to: %u\n", MAX_THREADS);
+ }
- /* no greater than 1M */
- if (seg_order > MAX_SEG_ORDER) {
- seg_order = MAX_SEG_ORDER;
- pr_info("Fix seg_order to %u\n", seg_order);
- }
+ /* no greater than 1M */
+ if (seg_order > MAX_SEG_ORDER) {
+ seg_order = MAX_SEG_ORDER;
+ pr_info("Fix seg_order to %u\n", seg_order);
+ }
- if (run_order < seg_order) {
- run_order = seg_order;
- pr_info("Fix run_order to %u\n", run_order);
- }
+ if (run_order < seg_order) {
+ run_order = seg_order;
+ pr_info("Fix run_order to %u\n", run_order);
+ }
- node = dev_to_node(&perf->ntb->pdev->dev);
- /* launch kernel thread */
- for (i = 0; i < perf->perf_threads; i++) {
- struct pthr_ctx *pctx;
-
- pctx = &perf->pthr_ctx[i];
- atomic_set(&pctx->dma_sync, 0);
- pctx->perf = perf;
- pctx->thread =
- kthread_create_on_node(ntb_perf_thread,
- (void *)pctx,
- node, "ntb_perf %d", i);
- if (IS_ERR(pctx->thread)) {
- pctx->thread = NULL;
- goto err;
- } else
- wake_up_process(pctx->thread);
-
- if (perf->run == false)
- return -ENXIO;
- }
+ node = dev_to_node(&perf->ntb->pdev->dev);
+ atomic_set(&perf->tdone, 0);
+ /* launch kernel thread */
+ for (i = 0; i < perf->perf_threads; i++) {
+ struct pthr_ctx *pctx;
+
+ pctx = &perf->pthr_ctx[i];
+ atomic_set(&pctx->dma_sync, 0);
+ pctx->perf = perf;
+ pctx->wq = &wq;
+ pctx->thread =
+ kthread_create_on_node(ntb_perf_thread,
+ (void *)pctx,
+ node, "ntb_perf %d", i);
+ if (IS_ERR(pctx->thread)) {
+ pctx->thread = NULL;
+ goto err;
+ } else {
+ wake_up_process(pctx->thread);
+ }
}
+ wait_event_interruptible(wq,
+ atomic_read(&perf->tdone) == perf->perf_threads);
+
+ threads_cleanup(perf);
+ mutex_unlock(&perf->run_mutex);
return count;
err:
threads_cleanup(perf);
+ mutex_unlock(&perf->run_mutex);
return -ENXIO;
}
@@ -688,6 +758,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
int node;
int rc = 0;
+ if (ntb_spad_count(ntb) < MAX_SPAD) {
+ dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
+ DRIVER_NAME);
+ return -EIO;
+ }
+
node = dev_to_node(&pdev->dev);
perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
@@ -699,11 +775,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
perf->ntb = ntb;
perf->perf_threads = 1;
atomic_set(&perf->tsync, 0);
- perf->run = false;
+ mutex_init(&perf->run_mutex);
spin_lock_init(&perf->db_lock);
perf_setup_mw(ntb, perf);
+ init_waitqueue_head(&perf->link_wq);
INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
- INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
rc = ntb_set_ctx(ntb, perf, &perf_ops);
if (rc)
@@ -717,11 +793,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
if (rc)
goto err_ctx;
+ perf_clear_thread_status(perf);
+
return 0;
err_ctx:
cancel_delayed_work_sync(&perf->link_work);
- cancel_work_sync(&perf->link_cleanup);
kfree(perf);
err_perf:
return rc;
@@ -734,8 +811,9 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+ mutex_lock(&perf->run_mutex);
+
cancel_delayed_work_sync(&perf->link_work);
- cancel_work_sync(&perf->link_cleanup);
ntb_clear_ctx(ntb);
ntb_link_disable(ntb);
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index fe1600566981..7d311799fca1 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -61,6 +61,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/debugfs.h>
#include <linux/ntb.h>
@@ -96,8 +97,13 @@ struct pp_ctx {
spinlock_t db_lock;
struct timer_list db_timer;
unsigned long db_delay;
+ struct dentry *debugfs_node_dir;
+ struct dentry *debugfs_count;
+ atomic_t count;
};
+static struct dentry *pp_debugfs_dir;
+
static void pp_ping(unsigned long ctx)
{
struct pp_ctx *pp = (void *)ctx;
@@ -171,10 +177,32 @@ static void pp_db_event(void *ctx, int vec)
dev_dbg(&pp->ntb->dev,
"Pong vec %d bits %#llx\n",
vec, db_bits);
+ atomic_inc(&pp->count);
}
spin_unlock_irqrestore(&pp->db_lock, irqflags);
}
+static int pp_debugfs_setup(struct pp_ctx *pp)
+{
+ struct pci_dev *pdev = pp->ntb->pdev;
+
+ if (!pp_debugfs_dir)
+ return -ENODEV;
+
+ pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+ pp_debugfs_dir);
+ if (!pp->debugfs_node_dir)
+ return -ENODEV;
+
+ pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR,
+ pp->debugfs_node_dir,
+ &pp->count);
+ if (!pp->debugfs_count)
+ return -ENODEV;
+
+ return 0;
+}
+
static const struct ntb_ctx_ops pp_ops = {
.link_event = pp_link_event,
.db_event = pp_db_event,
@@ -210,6 +238,7 @@ static int pp_probe(struct ntb_client *client,
pp->ntb = ntb;
pp->db_bits = 0;
+ atomic_set(&pp->count, 0);
spin_lock_init(&pp->db_lock);
setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp);
pp->db_delay = msecs_to_jiffies(delay_ms);
@@ -218,6 +247,10 @@ static int pp_probe(struct ntb_client *client,
if (rc)
goto err_ctx;
+ rc = pp_debugfs_setup(pp);
+ if (rc)
+ goto err_ctx;
+
ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
ntb_link_event(ntb);
@@ -234,6 +267,8 @@ static void pp_remove(struct ntb_client *client,
{
struct pp_ctx *pp = ntb->ctx;
+ debugfs_remove_recursive(pp->debugfs_node_dir);
+
ntb_clear_ctx(ntb);
del_timer_sync(&pp->db_timer);
ntb_link_disable(ntb);
@@ -247,4 +282,29 @@ static struct ntb_client pp_client = {
.remove = pp_remove,
},
};
-module_ntb_client(pp_client);
+
+static int __init pp_init(void)
+{
+ int rc;
+
+ if (debugfs_initialized())
+ pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+ rc = ntb_register_client(&pp_client);
+ if (rc)
+ goto err_client;
+
+ return 0;
+
+err_client:
+ debugfs_remove_recursive(pp_debugfs_dir);
+ return rc;
+}
+module_init(pp_init);
+
+static void __exit pp_exit(void)
+{
+ ntb_unregister_client(&pp_client);
+ debugfs_remove_recursive(pp_debugfs_dir);
+}
+module_exit(pp_exit);
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index 6f5dc6ca673d..61bf2ef87e0e 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -59,6 +59,12 @@
*
* Eg: check if clearing the doorbell mask generates an interrupt.
*
+ * # Check the link status
+ * root@self# cat $DBG_DIR/link
+ *
+ * # Block until the link is up
+ * root@self# echo Y > $DBG_DIR/link_event
+ *
* # Set the doorbell mask
* root@self# echo 's 1' > $DBG_DIR/mask
*
@@ -79,6 +85,13 @@
* root@self# cat $DBG_DIR/spad
*
* Observe that spad 0 and 1 have the values set by the peer.
+ *
+ * # Check the memory window translation info
+ * cat $DBG_DIR/peer_trans0
+ *
+ * # Setup a 16k memory window buffer
+ * echo 16384 > $DBG_DIR/peer_trans0
+ *
*/
#include <linux/init.h>
@@ -89,6 +102,7 @@
#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <linux/ntb.h>
@@ -105,11 +119,27 @@ MODULE_VERSION(DRIVER_VERSION);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+#define MAX_MWS 16
+
static struct dentry *tool_dbgfs;
+struct tool_mw {
+ int idx;
+ struct tool_ctx *tc;
+ resource_size_t win_size;
+ resource_size_t size;
+ u8 __iomem *local;
+ u8 *peer;
+ dma_addr_t peer_dma;
+ struct dentry *peer_dbg_file;
+};
+
struct tool_ctx {
struct ntb_dev *ntb;
struct dentry *dbgfs;
+ wait_queue_head_t link_wq;
+ int mw_count;
+ struct tool_mw mws[MAX_MWS];
};
#define SPAD_FNAME_SIZE 0x10
@@ -135,6 +165,8 @@ static void tool_link_event(void *ctx)
dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n",
up ? "up" : "down", speed, width);
+
+ wake_up(&tc->link_wq);
}
static void tool_db_event(void *ctx, int vec)
@@ -239,7 +271,14 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf,
if (!spad_read_fn)
return -EINVAL;
- buf_size = min_t(size_t, size, 0x100);
+ spad_count = ntb_spad_count(tc->ntb);
+
+ /*
+ * We multiply the number of spads by 15 to get the buffer size
+ * this is from 3 for the %d, 10 for the largest hex value
+ * (0x00000000) and 2 for the tab and line feed.
+ */
+ buf_size = min_t(size_t, size, spad_count * 15);
buf = kmalloc(buf_size, GFP_KERNEL);
if (!buf)
@@ -247,7 +286,6 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf,
pos = 0;
- spad_count = ntb_spad_count(tc->ntb);
for (i = 0; i < spad_count; ++i) {
pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n",
i, spad_read_fn(tc->ntb, i));
@@ -268,7 +306,7 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc,
{
int spad_idx;
u32 spad_val;
- char *buf;
+ char *buf, *buf_ptr;
int pos, n;
ssize_t rc;
@@ -288,14 +326,15 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc,
}
buf[size] = 0;
-
- n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos);
+ buf_ptr = buf;
+ n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos);
while (n == 2) {
+ buf_ptr += pos;
rc = spad_write_fn(tc->ntb, spad_idx, spad_val);
if (rc)
break;
- n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos);
+ n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos);
}
if (n < 0)
@@ -442,8 +481,384 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops,
tool_peer_spad_read,
tool_peer_spad_write);
+static ssize_t tool_link_read(struct file *filep, char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_ctx *tc = filep->private_data;
+ char buf[3];
+
+ buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N';
+ buf[1] = '\n';
+ buf[2] = '\0';
+
+ return simple_read_from_buffer(ubuf, size, offp, buf, 2);
+}
+
+static ssize_t tool_link_write(struct file *filep, const char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_ctx *tc = filep->private_data;
+ char buf[32];
+ size_t buf_size;
+ bool val;
+ int rc;
+
+ buf_size = min(size, (sizeof(buf) - 1));
+ if (copy_from_user(buf, ubuf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+
+ rc = strtobool(buf, &val);
+ if (rc)
+ return rc;
+
+ if (val)
+ rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ else
+ rc = ntb_link_disable(tc->ntb);
+
+ if (rc)
+ return rc;
+
+ return size;
+}
+
+static TOOL_FOPS_RDWR(tool_link_fops,
+ tool_link_read,
+ tool_link_write);
+
+static ssize_t tool_link_event_write(struct file *filep,
+ const char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_ctx *tc = filep->private_data;
+ char buf[32];
+ size_t buf_size;
+ bool val;
+ int rc;
+
+ buf_size = min(size, (sizeof(buf) - 1));
+ if (copy_from_user(buf, ubuf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+
+ rc = strtobool(buf, &val);
+ if (rc)
+ return rc;
+
+ if (wait_event_interruptible(tc->link_wq,
+ ntb_link_is_up(tc->ntb, NULL, NULL) == val))
+ return -ERESTART;
+
+ return size;
+}
+
+static TOOL_FOPS_RDWR(tool_link_event_fops,
+ NULL,
+ tool_link_event_write);
+
+static ssize_t tool_mw_read(struct file *filep, char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_mw *mw = filep->private_data;
+ ssize_t rc;
+ loff_t pos = *offp;
+ void *buf;
+
+ if (mw->local == NULL)
+ return -EIO;
+ if (pos < 0)
+ return -EINVAL;
+ if (pos >= mw->win_size || !size)
+ return 0;
+ if (size > mw->win_size - pos)
+ size = mw->win_size - pos;
+
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy_fromio(buf, mw->local + pos, size);
+ rc = copy_to_user(ubuf, buf, size);
+ if (rc == size) {
+ rc = -EFAULT;
+ goto err_free;
+ }
+
+ size -= rc;
+ *offp = pos + size;
+ rc = size;
+
+err_free:
+ kfree(buf);
+
+ return rc;
+}
+
+static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_mw *mw = filep->private_data;
+ ssize_t rc;
+ loff_t pos = *offp;
+ void *buf;
+
+ if (pos < 0)
+ return -EINVAL;
+ if (pos >= mw->win_size || !size)
+ return 0;
+ if (size > mw->win_size - pos)
+ size = mw->win_size - pos;
+
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ rc = copy_from_user(buf, ubuf, size);
+ if (rc == size) {
+ rc = -EFAULT;
+ goto err_free;
+ }
+
+ size -= rc;
+ *offp = pos + size;
+ rc = size;
+
+ memcpy_toio(mw->local + pos, buf, size);
+
+err_free:
+ kfree(buf);
+
+ return rc;
+}
+
+static TOOL_FOPS_RDWR(tool_mw_fops,
+ tool_mw_read,
+ tool_mw_write);
+
+static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_mw *mw = filep->private_data;
+
+ if (!mw->peer)
+ return -ENXIO;
+
+ return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size);
+}
+
+static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_mw *mw = filep->private_data;
+
+ if (!mw->peer)
+ return -ENXIO;
+
+ return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_mw_fops,
+ tool_peer_mw_read,
+ tool_peer_mw_write);
+
+static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size)
+{
+ int rc;
+ struct tool_mw *mw = &tc->mws[idx];
+ phys_addr_t base;
+ resource_size_t size, align, align_size;
+ char buf[16];
+
+ if (mw->peer)
+ return 0;
+
+ rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align,
+ &align_size);
+ if (rc)
+ return rc;
+
+ mw->size = min_t(resource_size_t, req_size, size);
+ mw->size = round_up(mw->size, align);
+ mw->size = round_up(mw->size, align_size);
+ mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size,
+ &mw->peer_dma, GFP_KERNEL);
+
+ if (!mw->peer)
+ return -ENOMEM;
+
+ rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size);
+ if (rc)
+ goto err_free_dma;
+
+ snprintf(buf, sizeof(buf), "peer_mw%d", idx);
+ mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR,
+ mw->tc->dbgfs, mw,
+ &tool_peer_mw_fops);
+
+ return 0;
+
+err_free_dma:
+ dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
+ mw->peer,
+ mw->peer_dma);
+ mw->peer = NULL;
+ mw->peer_dma = 0;
+ mw->size = 0;
+
+ return rc;
+}
+
+static void tool_free_mw(struct tool_ctx *tc, int idx)
+{
+ struct tool_mw *mw = &tc->mws[idx];
+
+ if (mw->peer) {
+ ntb_mw_clear_trans(tc->ntb, idx);
+ dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
+ mw->peer,
+ mw->peer_dma);
+ }
+
+ mw->peer = NULL;
+ mw->peer_dma = 0;
+
+ debugfs_remove(mw->peer_dbg_file);
+
+ mw->peer_dbg_file = NULL;
+}
+
+static ssize_t tool_peer_mw_trans_read(struct file *filep,
+ char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_mw *mw = filep->private_data;
+
+ char *buf;
+ size_t buf_size;
+ ssize_t ret, off = 0;
+
+ phys_addr_t base;
+ resource_size_t mw_size;
+ resource_size_t align;
+ resource_size_t align_size;
+
+ buf_size = min_t(size_t, size, 512);
+
+ buf = kmalloc(buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ntb_mw_get_range(mw->tc->ntb, mw->idx,
+ &base, &mw_size, &align, &align_size);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Peer MW %d Information:\n", mw->idx);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Physical Address \t%pa[p]\n",
+ &base);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Window Size \t%lld\n",
+ (unsigned long long)mw_size);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Alignment \t%lld\n",
+ (unsigned long long)align);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Size Alignment \t%lld\n",
+ (unsigned long long)align_size);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Ready \t%c\n",
+ (mw->peer) ? 'Y' : 'N');
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Allocated Size \t%zd\n",
+ (mw->peer) ? (size_t)mw->size : 0);
+
+ ret = simple_read_from_buffer(ubuf, size, offp, buf, off);
+ kfree(buf);
+ return ret;
+}
+
+static ssize_t tool_peer_mw_trans_write(struct file *filep,
+ const char __user *ubuf,
+ size_t size, loff_t *offp)
+{
+ struct tool_mw *mw = filep->private_data;
+
+ char buf[32];
+ size_t buf_size;
+ unsigned long long val;
+ int rc;
+
+ buf_size = min(size, (sizeof(buf) - 1));
+ if (copy_from_user(buf, ubuf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+
+ rc = kstrtoull(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ tool_free_mw(mw->tc, mw->idx);
+ if (val)
+ rc = tool_setup_mw(mw->tc, mw->idx, val);
+
+ if (rc)
+ return rc;
+
+ return size;
+}
+
+static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops,
+ tool_peer_mw_trans_read,
+ tool_peer_mw_trans_write);
+
+static int tool_init_mw(struct tool_ctx *tc, int idx)
+{
+ struct tool_mw *mw = &tc->mws[idx];
+ phys_addr_t base;
+ int rc;
+
+ rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size,
+ NULL, NULL);
+ if (rc)
+ return rc;
+
+ mw->tc = tc;
+ mw->idx = idx;
+ mw->local = ioremap_wc(base, mw->win_size);
+ if (!mw->local)
+ return -EFAULT;
+
+ return 0;
+}
+
+static void tool_free_mws(struct tool_ctx *tc)
+{
+ int i;
+
+ for (i = 0; i < tc->mw_count; i++) {
+ tool_free_mw(tc, i);
+
+ if (tc->mws[i].local)
+ iounmap(tc->mws[i].local);
+
+ tc->mws[i].local = NULL;
+ }
+}
+
static void tool_setup_dbgfs(struct tool_ctx *tc)
{
+ int i;
+
/* This modules is useless without dbgfs... */
if (!tool_dbgfs) {
tc->dbgfs = NULL;
@@ -472,12 +887,31 @@ static void tool_setup_dbgfs(struct tool_ctx *tc)
debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs,
tc, &tool_peer_spad_fops);
+
+ debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs,
+ tc, &tool_link_fops);
+
+ debugfs_create_file("link_event", S_IWUSR, tc->dbgfs,
+ tc, &tool_link_event_fops);
+
+ for (i = 0; i < tc->mw_count; i++) {
+ char buf[30];
+
+ snprintf(buf, sizeof(buf), "mw%d", i);
+ debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs,
+ &tc->mws[i], &tool_mw_fops);
+
+ snprintf(buf, sizeof(buf), "peer_trans%d", i);
+ debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs,
+ &tc->mws[i], &tool_peer_mw_trans_fops);
+ }
}
static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
{
struct tool_ctx *tc;
int rc;
+ int i;
if (ntb_db_is_unsafe(ntb))
dev_dbg(&ntb->dev, "doorbell is unsafe\n");
@@ -485,13 +919,21 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
if (ntb_spad_is_unsafe(ntb))
dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
- tc = kmalloc(sizeof(*tc), GFP_KERNEL);
+ tc = kzalloc(sizeof(*tc), GFP_KERNEL);
if (!tc) {
rc = -ENOMEM;
goto err_tc;
}
tc->ntb = ntb;
+ init_waitqueue_head(&tc->link_wq);
+
+ tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS);
+ for (i = 0; i < tc->mw_count; i++) {
+ rc = tool_init_mw(tc, i);
+ if (rc)
+ goto err_ctx;
+ }
tool_setup_dbgfs(tc);
@@ -505,6 +947,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
return 0;
err_ctx:
+ tool_free_mws(tc);
debugfs_remove_recursive(tc->dbgfs);
kfree(tc);
err_tc:
@@ -515,6 +958,8 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb)
{
struct tool_ctx *tc = ntb->ctx;
+ tool_free_mws(tc);
+
ntb_clear_ctx(ntb);
ntb_link_disable(ntb);