summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-07 13:33:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-07 13:33:07 -0800
commitab9f2faf8f40604551336e5b0a18e0910a57b92c (patch)
tree9068c73acf24452762d6e2b096df19e29436183e
parent75021d28594d9b6fb4d05bbc41f77948a0db0e02 (diff)
parentdb7489e07669073970358b6cacf6a9dd8dc9275e (diff)
downloadlinux-stable-ab9f2faf8f40604551336e5b0a18e0910a57b92c.tar.gz
linux-stable-ab9f2faf8f40604551336e5b0a18e0910a57b92c.tar.bz2
linux-stable-ab9f2faf8f40604551336e5b0a18e0910a57b92c.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "This is my initial round of 4.4 merge window patches. There are a few other things I wish to get in for 4.4 that aren't in this pull, as this represents what has gone through merge/build/run testing and not what is the last few items for which testing is not yet complete. - "Checksum offload support in user space" enablement - Misc cxgb4 fixes, add T6 support - Misc usnic fixes - 32 bit build warning fixes - Misc ocrdma fixes - Multicast loopback prevention extension - Extend the GID cache to store and return attributes of GIDs - Misc iSER updates - iSER clustering update - Network NameSpace support for rdma CM - Work Request cleanup series - New Memory Registration API" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (76 commits) IB/core, cma: Make __attribute_const__ declarations sparse-friendly IB/core: Remove old fast registration API IB/ipath: Remove fast registration from the code IB/hfi1: Remove fast registration from the code RDMA/nes: Remove old FRWR API IB/qib: Remove old FRWR API iw_cxgb4: Remove old FRWR API RDMA/cxgb3: Remove old FRWR API RDMA/ocrdma: Remove old FRWR API IB/mlx4: Remove old FRWR API support IB/mlx5: Remove old FRWR API support IB/srp: Dont allocate a page vector when using fast_reg IB/srp: Remove srp_finish_mapping IB/srp: Convert to new registration API IB/srp: Split srp_map_sg RDS/IW: Convert to new memory registration API svcrdma: Port to new memory registration API xprtrdma: Port to new memory registration API iser-target: Port to new memory registration API IB/iser: Port to new fast registration API ...
-rw-r--r--MAINTAINERS5
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/agent.c2
-rw-r--r--drivers/infiniband/core/cache.c112
-rw-r--r--drivers/infiniband/core/cm.c40
-rw-r--r--drivers/infiniband/core/cma.c173
-rw-r--r--drivers/infiniband/core/core_priv.h9
-rw-r--r--drivers/infiniband/core/device.c19
-rw-r--r--drivers/infiniband/core/mad.c42
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/multicast.c3
-rw-r--r--drivers/infiniband/core/sa_query.c19
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucma.c5
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c402
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c4
-rw-r--r--drivers/infiniband/core/verbs.c295
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c39
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c43
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c331
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h25
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c63
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c73
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h5
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c17
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c91
-rw-r--r--drivers/infiniband/hw/mlx4/main.c75
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h37
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c169
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c330
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h54
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c187
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c227
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c84
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h6
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c170
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c22
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c60
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c19
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c184
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h7
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c38
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c29
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h19
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c9
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c20
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h25
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c51
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c370
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c20
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c269
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c262
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h11
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c35
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h41
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c22
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h85
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h48
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c30
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h6
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c35
-rw-r--r--drivers/staging/rdma/amso1100/c2_qp.c8
-rw-r--r--drivers/staging/rdma/ehca/ehca_reqs.c53
-rw-r--r--drivers/staging/rdma/hfi1/keys.c55
-rw-r--r--drivers/staging/rdma/hfi1/mr.c33
-rw-r--r--drivers/staging/rdma/hfi1/qp.c2
-rw-r--r--drivers/staging/rdma/hfi1/rc.c24
-rw-r--r--drivers/staging/rdma/hfi1/ruc.c18
-rw-r--r--drivers/staging/rdma/hfi1/uc.c4
-rw-r--r--drivers/staging/rdma/hfi1/ud.c20
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c26
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h14
-rw-r--r--drivers/staging/rdma/ipath/ipath_rc.c24
-rw-r--r--drivers/staging/rdma/ipath/ipath_ruc.c16
-rw-r--r--drivers/staging/rdma/ipath/ipath_uc.c4
-rw-r--r--drivers/staging/rdma/ipath/ipath_ud.c26
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.c17
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.h8
-rw-r--r--include/linux/mlx4/device.h2
-rw-r--r--include/linux/mlx4/qp.h24
-rw-r--r--include/linux/sunrpc/svc_rdma.h6
-rw-r--r--include/rdma/ib_addr.h18
-rw-r--r--include/rdma/ib_cache.h40
-rw-r--r--include/rdma/ib_pack.h2
-rw-r--r--include/rdma/ib_sa.h12
-rw-r--r--include/rdma/ib_verbs.h222
-rw-r--r--include/rdma/rdma_cm.h8
-rw-r--r--include/uapi/rdma/ib_user_verbs.h26
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib.h6
-rw-r--r--net/rds/ib_cm.c2
-rw-r--r--net/rds/ib_send.c71
-rw-r--r--net/rds/iw.c2
-rw-r--r--net/rds/iw.h9
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/iw_rdma.c129
-rw-r--r--net/rds/iw_send.c154
-rw-r--r--net/rds/rdma_transport.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c119
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c123
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c18
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c38
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h3
140 files changed, 3600 insertions, 3015 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f56a10a3eabc..4c5446a6a4a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2757,9 +2757,10 @@ S: Supported
F: drivers/net/ethernet/cisco/enic/
CISCO VIC LOW LATENCY NIC DRIVER
-M: Upinder Malhi <umalhi@cisco.com>
+M: Christian Benvenuti <benve@cisco.com>
+M: Dave Goodell <dgoodell@cisco.com>
S: Supported
-F: drivers/infiniband/hw/usnic
+F: drivers/infiniband/hw/usnic/
CIRRUS LOGIC EP93XX ETHERNET DRIVER
M: Hartley Sweeten <hsweeten@visionengravers.com>
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 746cdf56bc76..34b1adad07aa 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
int ret = -EADDRNOTAVAIL;
if (dev_addr->bound_dev_if) {
- dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
switch (addr->sa_family) {
case AF_INET:
- dev = ip_dev_find(&init_net,
+ dev = ip_dev_find(dev_addr->net,
((struct sockaddr_in *) addr)->sin_addr.s_addr);
if (!dev)
@@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
-
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (ipv6_chk_addr(&init_net,
+ for_each_netdev_rcu(dev_addr->net, dev) {
+ if (ipv6_chk_addr(dev_addr->net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
fl4.daddr = dst_ip;
fl4.saddr = src_ip;
fl4.flowi4_oif = addr->bound_dev_if;
- rt = ip_route_output_key(&init_net, &fl4);
+ rt = ip_route_output_key(addr->net, &fl4);
if (IS_ERR(rt)) {
ret = PTR_ERR(rt);
goto out;
@@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(&init_net, NULL, &fl6);
+ dst = ip6_route_output(addr->net, NULL, &fl6);
if ((ret = dst->error))
goto put;
if (ipv6_addr_any(&fl6.saddr)) {
- ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
if (ret)
goto put;
@@ -458,7 +457,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
}
int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id)
+ u8 *dmac, u16 *vlan_id, int if_index)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
@@ -476,6 +475,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
+ dev_addr.bound_dev_if = if_index;
+ dev_addr.net = &init_net;
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
@@ -510,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
rdma_gid2ip(&gid_addr._sockaddr, sgid);
memset(&dev_addr, 0, sizeof(dev_addr));
+ dev_addr.net = &init_net;
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 0429040304fd..4fa524dfb6cf 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
mad_send_wr = container_of(send_buf,
struct ib_mad_send_wr_private,
send_buf);
- mad_send_wr->send_wr.wr.ud.port_num = port_num;
+ mad_send_wr->send_wr.port_num = port_num;
}
if (ib_post_send_mad(send_buf, NULL)) {
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 87471ef37198..89bebeada38b 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -409,10 +409,10 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
mask, port, index);
}
-int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
- const union ib_gid *gid,
- u8 port, struct net_device *ndev,
- u16 *index)
+int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ u8 port, struct net_device *ndev,
+ u16 *index)
{
int local_index;
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
@@ -438,6 +438,82 @@ int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
return -ENOENT;
}
+EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+
+/**
+ * ib_find_gid_by_filter - Returns the GID table index where a specified
+ * GID value occurs
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value could be
+ * searched.
+ * @filter: The filter function is executed on any matching GID in the table.
+ * If the filter function returns true, the corresponding index is returned,
+ * otherwise, we continue searching the GID table. It's guaranteed that
+ * while filter is executed, ndev field is valid and the structure won't
+ * change. filter is executed in an atomic context. filter must not be NULL.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ *
+ * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * of which the filter function returns true in the port's GID table.
+ * This function is only supported on RoCE ports.
+ *
+ */
+static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ u8 port,
+ bool (*filter)(const union ib_gid *,
+ const struct ib_gid_attr *,
+ void *),
+ void *context,
+ u16 *index)
+{
+ struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
+ unsigned int i;
+ bool found = false;
+
+ if (!ports_table)
+ return -EOPNOTSUPP;
+
+ if (port < rdma_start_port(ib_dev) ||
+ port > rdma_end_port(ib_dev) ||
+ !rdma_protocol_roce(ib_dev, port))
+ return -EPROTONOSUPPORT;
+
+ table = ports_table[port - rdma_start_port(ib_dev)];
+
+ for (i = 0; i < table->sz; i++) {
+ struct ib_gid_attr attr;
+ unsigned long flags;
+
+ read_lock_irqsave(&table->data_vec[i].lock, flags);
+ if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
+ goto next;
+
+ if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
+ goto next;
+
+ memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
+
+ if (filter(gid, &attr, context))
+ found = true;
+
+next:
+ read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+
+ if (found)
+ break;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ if (index)
+ *index = i;
+ return 0;
+}
static struct ib_gid_table *alloc_gid_table(int sz)
{
@@ -649,24 +725,44 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
int ib_get_cached_gid(struct ib_device *device,
u8 port_num,
int index,
- union ib_gid *gid)
+ union ib_gid *gid,
+ struct ib_gid_attr *gid_attr)
{
if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
- return __ib_cache_gid_get(device, port_num, index, gid, NULL);
+ return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
+ struct net_device *ndev,
u8 *port_num,
u16 *index)
{
- return ib_cache_gid_find(device, gid, NULL, port_num, index);
+ return ib_cache_gid_find(device, gid, ndev, port_num, index);
}
EXPORT_SYMBOL(ib_find_cached_gid);
+int ib_find_gid_by_filter(struct ib_device *device,
+ const union ib_gid *gid,
+ u8 port_num,
+ bool (*filter)(const union ib_gid *gid,
+ const struct ib_gid_attr *,
+ void *),
+ void *context, u16 *index)
+{
+ /* Only RoCE GID table supports filter function */
+ if (!rdma_cap_roce_gid_table(device, port_num) && filter)
+ return -EPROTONOSUPPORT;
+
+ return ib_cache_gid_find_by_filter(device, gid,
+ port_num, filter,
+ context, index);
+}
+EXPORT_SYMBOL(ib_find_gid_by_filter);
+
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
int index,
@@ -845,7 +941,7 @@ static void ib_cache_update(struct ib_device *device,
if (!use_roce_gid_table) {
for (i = 0; i < gid_cache->table_len; ++i) {
ret = ib_query_gid(device, port, i,
- gid_cache->table + i);
+ gid_cache->table + i, NULL);
if (ret) {
printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4f918b929eca..0a26dd6d9b19 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -179,8 +179,6 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
- u8 valid;
- u8 smac[ETH_ALEN];
};
struct cm_work {
@@ -361,17 +359,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
unsigned long flags;
int ret;
u8 p;
+ struct net_device *ndev = ib_get_ndev_from_path(path);
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- &p, NULL)) {
+ ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
}
read_unlock_irqrestore(&cm.device_lock, flags);
+ if (ndev)
+ dev_put(ndev);
+
if (!port)
return -EINVAL;
@@ -384,9 +386,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
- memcpy(av->smac, path->smac, sizeof(av->smac));
- av->valid = 1;
return 0;
}
@@ -1639,11 +1639,11 @@ static int cm_req_handler(struct cm_work *work)
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0, &work->path[0].sgid);
+ work->port->port_num, 0, &work->path[0].sgid,
+ NULL);
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -3618,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
- if (!cm_id_priv->av.valid) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return -EINVAL;
- }
- if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
- qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
- *qp_attr_mask |= IB_QP_VID;
- }
- if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
- memcpy(qp_attr->smac, cm_id_priv->av.smac,
- sizeof(qp_attr->smac));
- *qp_attr_mask |= IB_QP_SMAC;
- }
- if (cm_id_priv->alt_av.valid) {
- if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
- qp_attr->alt_vlan_id =
- cm_id_priv->alt_av.ah_attr.vlan_id;
- *qp_attr_mask |= IB_QP_ALT_VID;
- }
- if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
- memcpy(qp_attr->alt_smac,
- cm_id_priv->alt_av.smac,
- sizeof(qp_attr->alt_smac));
- *qp_attr_mask |= IB_QP_ALT_SMAC;
- }
- }
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 36b12d560e17..944cd90417bc 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -44,6 +44,8 @@
#include <linux/module.h>
#include <net/route.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -86,7 +88,7 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
-const char *rdma_event_msg(enum rdma_cm_event_type event)
+const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -110,22 +112,33 @@ static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
-static DEFINE_IDR(tcp_ps);
-static DEFINE_IDR(udp_ps);
-static DEFINE_IDR(ipoib_ps);
-static DEFINE_IDR(ib_ps);
+static int cma_pernet_id;
-static struct idr *cma_idr(enum rdma_port_space ps)
+struct cma_pernet {
+ struct idr tcp_ps;
+ struct idr udp_ps;
+ struct idr ipoib_ps;
+ struct idr ib_ps;
+};
+
+static struct cma_pernet *cma_pernet(struct net *net)
+{
+ return net_generic(net, cma_pernet_id);
+}
+
+static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
{
+ struct cma_pernet *pernet = cma_pernet(net);
+
switch (ps) {
case RDMA_PS_TCP:
- return &tcp_ps;
+ return &pernet->tcp_ps;
case RDMA_PS_UDP:
- return &udp_ps;
+ return &pernet->udp_ps;
case RDMA_PS_IPOIB:
- return &ipoib_ps;
+ return &pernet->ipoib_ps;
case RDMA_PS_IB:
- return &ib_ps;
+ return &pernet->ib_ps;
default:
return NULL;
}
@@ -145,24 +158,25 @@ struct rdma_bind_list {
unsigned short port;
};
-static int cma_ps_alloc(enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
- struct idr *idr = cma_idr(ps);
+ struct idr *idr = cma_pernet_idr(net, ps);
return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
}
-static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum)
+static struct rdma_bind_list *cma_ps_find(struct net *net,
+ enum rdma_port_space ps, int snum)
{
- struct idr *idr = cma_idr(ps);
+ struct idr *idr = cma_pernet_idr(net, ps);
return idr_find(idr, snum);
}
-static void cma_ps_remove(enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
{
- struct idr *idr = cma_idr(ps);
+ struct idr *idr = cma_pernet_idr(net, ps);
idr_remove(idr, snum);
}
@@ -427,10 +441,11 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static inline int cma_validate_port(struct ib_device *device, u8 port,
- union ib_gid *gid, int dev_type)
+ union ib_gid *gid, int dev_type,
+ int bound_if_index)
{
- u8 found_port;
int ret = -ENODEV;
+ struct net_device *ndev = NULL;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
return ret;
@@ -438,9 +453,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- ret = ib_find_cached_gid(device, gid, &found_port, NULL);
- if (port != found_port)
- return -ENODEV;
+ if (dev_type == ARPHRD_ETHER)
+ ndev = dev_get_by_index(&init_net, bound_if_index);
+
+ ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+
+ if (ndev)
+ dev_put(ndev);
return ret;
}
@@ -472,7 +491,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
&iboe_gid : &gid;
ret = cma_validate_port(cma_dev->device, port, gidp,
- dev_addr->dev_type);
+ dev_addr->dev_type,
+ dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -490,7 +510,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
&iboe_gid : &gid;
ret = cma_validate_port(cma_dev->device, port, gidp,
- dev_addr->dev_type);
+ dev_addr->dev_type,
+ dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -531,7 +552,9 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
- for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
+ for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
+ &gid, NULL);
+ i++) {
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -577,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,
return 0;
}
-struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+struct rdma_cm_id *rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
void *context, enum rdma_port_space ps,
enum ib_qp_type qp_type)
{
@@ -601,6 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
+ id_priv->id.route.addr.dev_addr.net = get_net(net);
return &id_priv->id;
}
@@ -718,18 +743,12 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid);
+ qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
if (ret)
goto out;
BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
- if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
- ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
-
- if (ret)
- goto out;
- }
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -1260,7 +1279,7 @@ static bool cma_match_net_dev(const struct rdma_id_private *id_priv,
cma_protocol_roce(&id_priv->id);
return !addr->dev_addr.bound_dev_if ||
- (net_eq(dev_net(net_dev), &init_net) &&
+ (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
addr->dev_addr.bound_dev_if == net_dev->ifindex);
}
@@ -1321,7 +1340,8 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
}
}
- bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id),
+ bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
+ rdma_ps_from_service_id(req.service_id),
cma_port_from_service_id(req.service_id));
id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
if (IS_ERR(id_priv) && *net_dev) {
@@ -1392,6 +1412,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
static void cma_release_port(struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list = id_priv->bind_list;
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
if (!bind_list)
return;
@@ -1399,7 +1420,7 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_lock(&lock);
hlist_del(&id_priv->node);
if (hlist_empty(&bind_list->owners)) {
- cma_ps_remove(bind_list->ps, bind_list->port);
+ cma_ps_remove(net, bind_list->ps, bind_list->port);
kfree(bind_list);
}
mutex_unlock(&lock);
@@ -1458,6 +1479,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
cma_deref_id(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
+ put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
EXPORT_SYMBOL(rdma_destroy_id);
@@ -1588,7 +1610,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
- id = rdma_create_id(listen_id->event_handler, listen_id->context,
+ id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
listen_id->ps, ib_event->param.req_rcvd.qp_type);
if (IS_ERR(id))
return NULL;
@@ -1643,9 +1666,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
+ struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
- id = rdma_create_id(listen_id->event_handler, listen_id->context,
+ id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
listen_id->ps, IB_QPT_UD);
if (IS_ERR(id))
return NULL;
@@ -1882,7 +1906,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
return -ECONNABORTED;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = rdma_create_id(listen_id->id.event_handler,
+ new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
listen_id->id.context,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(new_cm_id)) {
@@ -2010,12 +2035,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
{
struct rdma_id_private *dev_id_priv;
struct rdma_cm_id *id;
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
+ id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
id_priv->id.qp_type);
if (IS_ERR(id))
return;
@@ -2294,16 +2320,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->num_paths = 1;
- if (addr->dev_addr.bound_dev_if)
+ if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ route->path_rec->net = &init_net;
+ route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+ }
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
- memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
@@ -2426,7 +2453,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
p = 1;
port_found:
- ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
+ ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
if (ret)
goto out;
@@ -2688,7 +2715,8 @@ static int cma_alloc_port(enum rdma_port_space ps,
if (!bind_list)
return -ENOMEM;
- ret = cma_ps_alloc(ps, bind_list, snum);
+ ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
+ snum);
if (ret < 0)
goto err;
@@ -2707,13 +2735,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
static unsigned int last_used_port;
int low, high, remaining;
unsigned int rover;
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
- inet_get_local_port_range(&init_net, &low, &high);
+ inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
retry:
if (last_used_port != rover &&
- !cma_ps_find(ps, (unsigned short)rover)) {
+ !cma_ps_find(net, ps, (unsigned short)rover)) {
int ret = cma_alloc_port(ps, id_priv, rover);
/*
* Remember previously used port number in order to avoid
@@ -2779,7 +2808,7 @@ static int cma_use_port(enum rdma_port_space ps,
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
- bind_list = cma_ps_find(ps, snum);
+ bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
if (!bind_list) {
ret = cma_alloc_port(ps, id_priv, snum);
} else {
@@ -2971,8 +3000,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (addr->sa_family == AF_INET)
id_priv->afonly = 1;
#if IS_ENABLED(CONFIG_IPV6)
- else if (addr->sa_family == AF_INET6)
- id_priv->afonly = init_net.ipv6.sysctl.bindv6only;
+ else if (addr->sa_family == AF_INET6) {
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
+
+ id_priv->afonly = net->ipv6.sysctl.bindv6only;
+ }
#endif
}
ret = cma_get_port(id_priv);
@@ -3777,6 +3809,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
if ((dev_addr->bound_dev_if == ndev->ifindex) &&
+ (net_eq(dev_net(ndev), dev_addr->net)) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);
@@ -3802,9 +3835,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
struct rdma_id_private *id_priv;
int ret = NOTIFY_DONE;
- if (dev_net(ndev) != &init_net)
- return NOTIFY_DONE;
-
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
@@ -3999,6 +4029,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = {
.module = THIS_MODULE },
};
+static int cma_init_net(struct net *net)
+{
+ struct cma_pernet *pernet = cma_pernet(net);
+
+ idr_init(&pernet->tcp_ps);
+ idr_init(&pernet->udp_ps);
+ idr_init(&pernet->ipoib_ps);
+ idr_init(&pernet->ib_ps);
+
+ return 0;
+}
+
+static void cma_exit_net(struct net *net)
+{
+ struct cma_pernet *pernet = cma_pernet(net);
+
+ idr_destroy(&pernet->tcp_ps);
+ idr_destroy(&pernet->udp_ps);
+ idr_destroy(&pernet->ipoib_ps);
+ idr_destroy(&pernet->ib_ps);
+}
+
+static struct pernet_operations cma_pernet_operations = {
+ .init = cma_init_net,
+ .exit = cma_exit_net,
+ .id = &cma_pernet_id,
+ .size = sizeof(struct cma_pernet),
+};
+
static int __init cma_init(void)
{
int ret;
@@ -4007,6 +4066,10 @@ static int __init cma_init(void)
if (!cma_wq)
return -ENOMEM;
+ ret = register_pernet_subsys(&cma_pernet_operations);
+ if (ret)
+ goto err_wq;
+
ib_sa_register_client(&sa_client);
rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
@@ -4024,6 +4087,7 @@ err:
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
+err_wq:
destroy_workqueue(cma_wq);
return ret;
}
@@ -4035,11 +4099,8 @@ static void __exit cma_cleanup(void)
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
destroy_workqueue(cma_wq);
- idr_destroy(&tcp_ps);
- idr_destroy(&udp_ps);
- idr_destroy(&ipoib_ps);
- idr_destroy(&ib_ps);
}
module_init(cma_init);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 70bb36ebb03b..5cf6eb716f00 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -46,8 +46,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
void ib_cache_setup(void);
void ib_cache_cleanup(void);
-int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask);
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
@@ -65,11 +65,6 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
roce_netdev_callback cb,
void *cookie);
-int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
- const union ib_gid *gid,
- u8 port, struct net_device *ndev,
- u16 *index);
-
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 17639117afc6..179e8134d57f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -672,14 +672,20 @@ EXPORT_SYMBOL(ib_query_port);
* @port_num:Port number to query
* @index:GID table index to query
* @gid:Returned GID
+ * @attr: Returned GID attributes related to this GID index (only in RoCE).
+ * NULL means ignore.
*
* ib_query_gid() fetches the specified GID table entry.
*/
int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid)
+ u8 port_num, int index, union ib_gid *gid,
+ struct ib_gid_attr *attr)
{
if (rdma_cap_roce_gid_table(device, port_num))
- return ib_get_cached_gid(device, port_num, index, gid);
+ return ib_get_cached_gid(device, port_num, index, gid, attr);
+
+ if (attr)
+ return -EINVAL;
return device->query_gid(device, port_num, index, gid);
}
@@ -819,27 +825,28 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index)
+ struct net_device *ndev, u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_cache_gid_find_by_port(device, gid, port,
- NULL, index)) {
+ if (!ib_find_cached_gid_by_port(device, gid, port,
+ ndev, index)) {
*port_num = port;
return 0;
}
}
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
- ret = ib_query_gid(device, port, i, &tmp_gid);
+ ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 4b5c72311deb..8d8af7a41a30 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num;
struct ib_wc mad_wc;
- struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
+ struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
u16 out_mad_pkey_index = 0;
u16 drslid;
@@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
if (rdma_cap_ib_switch(device) &&
smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- port_num = send_wr->wr.ud.port_num;
+ port_num = send_wr->port_num;
else
port_num = mad_agent_priv->agent.port_num;
@@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
build_smp_wc(mad_agent_priv->agent.qp,
- send_wr->wr_id, drslid,
- send_wr->wr.ud.pkey_index,
- send_wr->wr.ud.port_num, &mad_wc);
+ send_wr->wr.wr_id, drslid,
+ send_wr->pkey_index,
+ send_wr->port_num, &mad_wc);
if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
@@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local->mad_send_wr = mad_send_wr;
if (opa) {
- local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index;
+ local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
local->return_wc_byte_len = mad_size;
}
/* Reference MAD agent until send side of local completion handled */
@@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
- mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
- mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
- mad_send_wr->send_wr.num_sge = 2;
- mad_send_wr->send_wr.opcode = IB_WR_SEND;
- mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
- mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
- mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
- mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
+ mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
+ mad_send_wr->send_wr.wr.num_sge = 2;
+ mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
+ mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
+ mad_send_wr->send_wr.remote_qpn = remote_qpn;
+ mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
+ mad_send_wr->send_wr.pkey_index = pkey_index;
if (rmpp_active) {
ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
@@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
/* Set WR ID to find mad_send_wr upon completion */
qp_info = mad_send_wr->mad_agent_priv->qp_info;
- mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
+ mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
mad_agent = mad_send_wr->send_buf.mad_agent;
@@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
- ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+ ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
list = &qp_info->send_queue.list;
} else {
@@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
* request associated with the completion
*/
next_send_buf = send_buf->next;
- mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
+ mad_send_wr->send_wr.ah = send_buf->ah;
if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
@@ -1877,7 +1877,7 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
((1 << lmc) - 1)));
} else {
if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid))
+ attr.grh.sgid_index, &sgid, NULL))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
@@ -2457,7 +2457,7 @@ retry:
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
- ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
+ ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
&bad_send_wr);
if (ret) {
dev_err(&port_priv->device->dev,
@@ -2515,7 +2515,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
struct ib_send_wr *bad_send_wr;
mad_send_wr->retry = 0;
- ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
+ ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
if (ret)
ib_mad_send_done_handler(port_priv, wc);
@@ -2713,7 +2713,7 @@ static void local_completions(struct work_struct *work)
build_smp_wc(recv_mad_agent->agent.qp,
(unsigned long) local->mad_send_wr,
be16_to_cpu(IB_LID_PERMISSIVE),
- local->mad_send_wr->send_wr.wr.ud.pkey_index,
+ local->mad_send_wr->send_wr.pkey_index,
recv_mad_agent->agent.port_num, &wc);
local->mad_priv->header.recv_wc.wc = &wc;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 4a4f7aad0978..990698a6ab4b 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -123,7 +123,7 @@ struct ib_mad_send_wr_private {
struct ib_mad_send_buf send_buf;
u64 header_mapping;
u64 payload_mapping;
- struct ib_send_wr send_wr;
+ struct ib_ud_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
unsigned long timeout;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d38d8b2b2979..bb6685fb08c6 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
u16 gid_index;
u8 p;
- ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+ ret = ib_find_cached_gid(device, &rec->port_gid,
+ NULL, &p, &gid_index);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c014b33d8e0..dcdaa79e3f0f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1007,26 +1007,29 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
force_grh = rdma_cap_eth_ah(device, port_num);
if (rec->hop_limit > 1 || force_grh) {
+ struct net_device *ndev = ib_get_ndev_from_path(rec);
+
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
- ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
+ ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
&gid_index);
- if (ret)
+ if (ret) {
+ if (ndev)
+ dev_put(ndev);
return ret;
+ }
ah_attr->grh.sgid_index = gid_index;
ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
+ if (ndev)
+ dev_put(ndev);
}
if (force_grh) {
memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
- ah_attr->vlan_id = rec->vlan_id;
- } else {
- ah_attr->vlan_id = 0xffff;
}
-
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1150,9 +1153,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
- rec.vlan_id = 0xffff;
+ rec.net = NULL;
+ rec.ifindex = 0;
memset(rec.dmac, 0, ETH_ALEN);
- memset(rec.smac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 34cdd74b0a17..b1f37d4095fa 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
union ib_gid gid;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 30467d10df91..8b5a934e1133 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -42,6 +42,7 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/module.h>
+#include <linux/nsproxy.h>
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
@@ -472,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);
+ ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
+ ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(ctx->cm_id)) {
ret = PTR_ERR(ctx->cm_id);
goto err1;
@@ -1211,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
return -EINVAL;
memset(&sa_path, 0, sizeof(sa_path));
- sa_path.vlan_id = 0xffff;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3863d33c243d..94bbd8c155fc 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow);
IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
IB_UVERBS_DECLARE_EX_CMD(query_device);
IB_UVERBS_DECLARE_EX_CMD(create_cq);
+IB_UVERBS_DECLARE_EX_CMD(create_qp);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index be4cb9f04be3..94816aeb95a0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1478,7 +1478,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp));
+ INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
INIT_UDATA(&uhw, buf + sizeof(cmd),
(unsigned long)cmd.response + sizeof(resp),
@@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
return in_len;
}
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
-{
- struct ib_uverbs_create_qp cmd;
- struct ib_uverbs_create_qp_resp resp;
- struct ib_udata udata;
- struct ib_uqp_object *obj;
- struct ib_device *device;
- struct ib_pd *pd = NULL;
- struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
- struct ib_cq *scq = NULL, *rcq = NULL;
- struct ib_srq *srq = NULL;
- struct ib_qp *qp;
- struct ib_qp_init_attr attr;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+static int create_qp(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw,
+ struct ib_uverbs_ex_create_qp *cmd,
+ size_t cmd_sz,
+ int (*cb)(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_create_qp_resp *resp,
+ struct ib_udata *udata),
+ void *context)
+{
+ struct ib_uqp_object *obj;
+ struct ib_device *device;
+ struct ib_pd *pd = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_cq *scq = NULL, *rcq = NULL;
+ struct ib_srq *srq = NULL;
+ struct ib_qp *qp;
+ char *buf;
+ struct ib_qp_init_attr attr;
+ struct ib_uverbs_ex_create_qp_resp resp;
+ int ret;
- if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+ if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
-
obj = kzalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
+ init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
+ &qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
- if (cmd.qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ if (cmd->qp_type == IB_QPT_XRC_TGT) {
+ xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
+ &xrcd_uobj);
if (!xrcd) {
ret = -EINVAL;
goto err_put;
}
device = xrcd->device;
} else {
- if (cmd.qp_type == IB_QPT_XRC_INI) {
- cmd.max_recv_wr = cmd.max_recv_sge = 0;
+ if (cmd->qp_type == IB_QPT_XRC_INI) {
+ cmd->max_recv_wr = 0;
+ cmd->max_recv_sge = 0;
} else {
- if (cmd.is_srq) {
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (cmd->is_srq) {
+ srq = idr_read_srq(cmd->srq_handle,
+ file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
}
}
- if (cmd.recv_cq_handle != cmd.send_cq_handle) {
- rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
+ if (cmd->recv_cq_handle != cmd->send_cq_handle) {
+ rcq = idr_read_cq(cmd->recv_cq_handle,
+ file->ucontext, 0);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1808,9 +1807,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
}
}
- scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
+ scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = idr_read_pd(cmd->pd_handle, file->ucontext);
if (!pd || !scq) {
ret = -EINVAL;
goto err_put;
@@ -1825,31 +1824,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.recv_cq = rcq;
attr.srq = srq;
attr.xrcd = xrcd;
- attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- attr.qp_type = cmd.qp_type;
+ attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
+ IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd->qp_type;
attr.create_flags = 0;
- attr.cap.max_send_wr = cmd.max_send_wr;
- attr.cap.max_recv_wr = cmd.max_recv_wr;
- attr.cap.max_send_sge = cmd.max_send_sge;
- attr.cap.max_recv_sge = cmd.max_recv_sge;
- attr.cap.max_inline_data = cmd.max_inline_data;
+ attr.cap.max_send_wr = cmd->max_send_wr;
+ attr.cap.max_recv_wr = cmd->max_recv_wr;
+ attr.cap.max_send_sge = cmd->max_send_sge;
+ attr.cap.max_recv_sge = cmd->max_recv_sge;
+ attr.cap.max_inline_data = cmd->max_inline_data;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd.qp_type == IB_QPT_XRC_TGT)
+ if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
+ sizeof(cmd->create_flags))
+ attr.create_flags = cmd->create_flags;
+
+ if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ buf = (void *)cmd + sizeof(*cmd);
+ if (cmd_sz > sizeof(*cmd))
+ if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
+ cmd_sz - sizeof(*cmd) - 1))) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = device->create_qp(pd, &attr, &udata);
+ qp = device->create_qp(pd, &attr, uhw);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
- if (cmd.qp_type != IB_QPT_XRC_TGT) {
+ if (cmd->qp_type != IB_QPT_XRC_TGT) {
qp->real_qp = qp;
qp->device = device;
qp->pd = pd;
@@ -1875,19 +1892,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
goto err_destroy;
memset(&resp, 0, sizeof resp);
- resp.qpn = qp->qp_num;
- resp.qp_handle = obj->uevent.uobject.id;
- resp.max_recv_sge = attr.cap.max_recv_sge;
- resp.max_send_sge = attr.cap.max_send_sge;
- resp.max_recv_wr = attr.cap.max_recv_wr;
- resp.max_send_wr = attr.cap.max_send_wr;
- resp.max_inline_data = attr.cap.max_inline_data;
+ resp.base.qpn = qp->qp_num;
+ resp.base.qp_handle = obj->uevent.uobject.id;
+ resp.base.max_recv_sge = attr.cap.max_recv_sge;
+ resp.base.max_send_sge = attr.cap.max_send_sge;
+ resp.base.max_recv_wr = attr.cap.max_recv_wr;
+ resp.base.max_send_wr = attr.cap.max_send_wr;
+ resp.base.max_inline_data = attr.cap.max_inline_data;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+
+ ret = cb(file, &resp, ucore);
+ if (ret)
+ goto err_cb;
if (xrcd) {
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1913,9 +1931,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
up_write(&obj->uevent.uobject.mutex);
- return in_len;
-
-err_copy:
+ return 0;
+err_cb:
idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
err_destroy:
@@ -1937,6 +1954,113 @@ err_put:
return ret;
}
+static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_create_qp_resp *resp,
+ struct ib_udata *ucore)
+{
+ if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
+ return -EFAULT;
+
+ return 0;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_qp cmd;
+ struct ib_uverbs_ex_create_qp cmd_ex;
+ struct ib_udata ucore;
+ struct ib_udata uhw;
+ ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
+ int err;
+
+ if (out_len < resp_size)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
+ resp_size);
+ INIT_UDATA(&uhw, buf + sizeof(cmd),
+ (unsigned long)cmd.response + resp_size,
+ in_len - sizeof(cmd), out_len - resp_size);
+
+ memset(&cmd_ex, 0, sizeof(cmd_ex));
+ cmd_ex.user_handle = cmd.user_handle;
+ cmd_ex.pd_handle = cmd.pd_handle;
+ cmd_ex.send_cq_handle = cmd.send_cq_handle;
+ cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
+ cmd_ex.srq_handle = cmd.srq_handle;
+ cmd_ex.max_send_wr = cmd.max_send_wr;
+ cmd_ex.max_recv_wr = cmd.max_recv_wr;
+ cmd_ex.max_send_sge = cmd.max_send_sge;
+ cmd_ex.max_recv_sge = cmd.max_recv_sge;
+ cmd_ex.max_inline_data = cmd.max_inline_data;
+ cmd_ex.sq_sig_all = cmd.sq_sig_all;
+ cmd_ex.qp_type = cmd.qp_type;
+ cmd_ex.is_srq = cmd.is_srq;
+
+ err = create_qp(file, &ucore, &uhw, &cmd_ex,
+ offsetof(typeof(cmd_ex), is_srq) +
+ sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
+ NULL);
+
+ if (err)
+ return err;
+
+ return in_len;
+}
+
+static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_create_qp_resp *resp,
+ struct ib_udata *ucore)
+{
+ if (ib_copy_to_udata(ucore, resp, resp->response_length))
+ return -EFAULT;
+
+ return 0;
+}
+
+int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_create_qp_resp resp;
+ struct ib_uverbs_ex_create_qp cmd = {0};
+ int err;
+
+ if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
+ sizeof(cmd.comp_mask)))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (err)
+ return err;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if (cmd.reserved)
+ return -EINVAL;
+
+ if (ucore->outlen < (offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length)))
+ return -ENOSPC;
+
+ err = create_qp(file, ucore, uhw, &cmd,
+ min(ucore->inlen, sizeof(cmd)),
+ ib_uverbs_ex_create_qp_cb, NULL);
+
+ if (err)
+ return err;
+
+ return 0;
+}
+
ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
@@ -2221,7 +2345,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
if (qp->real_qp == qp) {
- ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
+ ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
if (ret)
goto release_qp;
ret = qp->device->modify_qp(qp, attr,
@@ -2303,6 +2427,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
return in_len;
}
+static void *alloc_wr(size_t wr_size, __u32 num_sge)
+{
+ return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
+ num_sge * sizeof (struct ib_sge), GFP_KERNEL);
+};
+
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
@@ -2351,14 +2481,83 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
- user_wr->num_sge * sizeof (struct ib_sge),
- GFP_KERNEL);
- if (!next) {
- ret = -ENOMEM;
+ if (is_ud) {
+ struct ib_ud_wr *ud;
+
+ if (user_wr->opcode != IB_WR_SEND &&
+ user_wr->opcode != IB_WR_SEND_WITH_IMM) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
+ if (!ud) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ if (!ud->ah) {
+ kfree(ud);
+ ret = -EINVAL;
+ goto out_put;
+ }
+ ud->remote_qpn = user_wr->wr.ud.remote_qpn;
+ ud->remote_qkey = user_wr->wr.ud.remote_qkey;
+
+ next = &ud->wr;
+ } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ user_wr->opcode == IB_WR_RDMA_WRITE ||
+ user_wr->opcode == IB_WR_RDMA_READ) {
+ struct ib_rdma_wr *rdma;
+
+ rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
+ if (!rdma) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ rdma->remote_addr = user_wr->wr.rdma.remote_addr;
+ rdma->rkey = user_wr->wr.rdma.rkey;
+
+ next = &rdma->wr;
+ } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ struct ib_atomic_wr *atomic;
+
+ atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
+ if (!atomic) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ atomic->remote_addr = user_wr->wr.atomic.remote_addr;
+ atomic->compare_add = user_wr->wr.atomic.compare_add;
+ atomic->swap = user_wr->wr.atomic.swap;
+ atomic->rkey = user_wr->wr.atomic.rkey;
+
+ next = &atomic->wr;
+ } else if (user_wr->opcode == IB_WR_SEND ||
+ user_wr->opcode == IB_WR_SEND_WITH_IMM ||
+ user_wr->opcode == IB_WR_SEND_WITH_INV) {
+ next = alloc_wr(sizeof(*next), user_wr->num_sge);
+ if (!next) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+ } else {
+ ret = -EINVAL;
goto out_put;
}
+ if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
+ user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
+ } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
+ next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
+ }
+
if (!last)
wr = next;
else
@@ -2371,60 +2570,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
next->opcode = user_wr->opcode;
next->send_flags = user_wr->send_flags;
- if (is_ud) {
- if (next->opcode != IB_WR_SEND &&
- next->opcode != IB_WR_SEND_WITH_IMM) {
- ret = -EINVAL;
- goto out_put;
- }
-
- next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
- file->ucontext);
- if (!next->wr.ud.ah) {
- ret = -EINVAL;
- goto out_put;
- }
- next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
- next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
- if (next->opcode == IB_WR_SEND_WITH_IMM)
- next->ex.imm_data =
- (__be32 __force) user_wr->ex.imm_data;
- } else {
- switch (next->opcode) {
- case IB_WR_RDMA_WRITE_WITH_IMM:
- next->ex.imm_data =
- (__be32 __force) user_wr->ex.imm_data;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_READ:
- next->wr.rdma.remote_addr =
- user_wr->wr.rdma.remote_addr;
- next->wr.rdma.rkey =
- user_wr->wr.rdma.rkey;
- break;
- case IB_WR_SEND_WITH_IMM:
- next->ex.imm_data =
- (__be32 __force) user_wr->ex.imm_data;
- break;
- case IB_WR_SEND_WITH_INV:
- next->ex.invalidate_rkey =
- user_wr->ex.invalidate_rkey;
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- next->wr.atomic.remote_addr =
- user_wr->wr.atomic.remote_addr;
- next->wr.atomic.compare_add =
- user_wr->wr.atomic.compare_add;
- next->wr.atomic.swap = user_wr->wr.atomic.swap;
- next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
- case IB_WR_SEND:
- break;
- default:
- ret = -EINVAL;
- goto out_put;
- }
- }
-
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge));
@@ -2458,8 +2603,8 @@ out_put:
put_qp_read(qp);
while (wr) {
- if (is_ud && wr->wr.ud.ah)
- put_ah_read(wr->wr.ud.ah);
+ if (is_ud && ud_wr(wr)->ah)
+ put_ah_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2698,7 +2843,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
attr.grh.sgid_index = cmd.attr.grh.sgid_index;
attr.grh.hop_limit = cmd.attr.grh.hop_limit;
attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- attr.vlan_id = 0;
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index c29a660c72fe..e3ef28861be6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -127,6 +127,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
[IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
[IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
+ [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
};
static void ib_uverbs_add_one(struct ib_device *device);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index abd97247443e..7d2f14c9bbef 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -141,8 +141,8 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
- memset(dst->smac, 0, sizeof(dst->smac));
memset(dst->dmac, 0, sizeof(dst->dmac));
- dst->vlan_id = 0xffff;
+ dst->net = NULL;
+ dst->ifindex = 0;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e1f2c9887f3f..043a60ee6836 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -41,6 +41,9 @@
#include <linux/export.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/addrconf.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -70,7 +73,7 @@ static const char * const ib_events[] = {
[IB_EVENT_GID_CHANGE] = "GID changed",
};
-const char *ib_event_msg(enum ib_event_type event)
+const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
{
size_t index = event;
@@ -104,7 +107,7 @@ static const char * const wc_statuses[] = {
[IB_WC_GENERAL_ERR] = "general error",
};
-const char *ib_wc_status_msg(enum ib_wc_status status)
+const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
{
size_t index = status;
@@ -308,6 +311,35 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
+struct find_gid_index_context {
+ u16 vlan_id;
+};
+
+static bool find_gid_index(const union ib_gid *gid,
+ const struct ib_gid_attr *gid_attr,
+ void *context)
+{
+ struct find_gid_index_context *ctx =
+ (struct find_gid_index_context *)context;
+
+ if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
+ (is_vlan_dev(gid_attr->ndev) &&
+ vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+ return false;
+
+ return true;
+}
+
+static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
+ u16 vlan_id, const union ib_gid *sgid,
+ u16 *gid_index)
+{
+ struct find_gid_index_context context = {.vlan_id = vlan_id};
+
+ return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+ &context, gid_index);
+}
+
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct ib_ah_attr *ah_attr)
@@ -318,21 +350,30 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
memset(ah_attr, 0, sizeof *ah_attr);
if (rdma_cap_eth_ah(device, port_num)) {
+ u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
+ wc->vlan_id : 0xffff;
+
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (wc->wc_flags & IB_WC_WITH_SMAC &&
- wc->wc_flags & IB_WC_WITH_VLAN) {
- memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
- ah_attr->vlan_id = wc->vlan_id;
- } else {
+ if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
+ !(wc->wc_flags & IB_WC_WITH_VLAN)) {
ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
- ah_attr->dmac, &ah_attr->vlan_id);
+ ah_attr->dmac,
+ wc->wc_flags & IB_WC_WITH_VLAN ?
+ NULL : &vlan_id,
+ 0);
if (ret)
return ret;
}
- } else {
- ah_attr->vlan_id = 0xffff;
+
+ ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+ &grh->dgid, &gid_index);
+ if (ret)
+ return ret;
+
+ if (wc->wc_flags & IB_WC_WITH_SMAC)
+ memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
}
ah_attr->dlid = wc->slid;
@@ -344,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = grh->sgid;
- ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
- &gid_index);
- if (ret)
- return ret;
+ if (!rdma_cap_eth_ah(device, port_num)) {
+ ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+ port_num, NULL,
+ &gid_index);
+ if (ret)
+ return ret;
+ }
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
@@ -617,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_MAX];
- enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
- enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -700,12 +742,6 @@ static const struct {
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
- .req_param_add_eth = {
- [IB_QPT_RC] = (IB_QP_SMAC),
- [IB_QPT_UC] = (IB_QP_SMAC),
- [IB_QPT_XRC_INI] = (IB_QP_SMAC),
- [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
- },
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
@@ -726,21 +762,7 @@ static const struct {
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
},
- .opt_param_add_eth = {
- [IB_QPT_RC] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_UC] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID)
- }
- }
+ },
},
[IB_QPS_RTR] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -962,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
- if (ll == IB_LINK_LAYER_ETHERNET) {
- req_param |= qp_state_table[cur_state][next_state].
- req_param_add_eth[type];
- opt_param |= qp_state_table[cur_state][next_state].
- opt_param_add_eth[type];
- }
-
if ((mask & req_param) != req_param)
return 0;
@@ -979,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
-int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
{
int ret = 0;
- union ib_gid sgid;
- if ((*qp_attr_mask & IB_QP_AV) &&
- (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) {
- ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
- qp_attr->ah_attr.grh.sgid_index, &sgid);
- if (ret)
- goto out;
+ if (*qp_attr_mask & IB_QP_AV) {
+ if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
+ qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
+ return -EINVAL;
+
+ if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
+ return 0;
+
if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
- rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
- if (!(*qp_attr_mask & IB_QP_VID))
- qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+ rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
+ qp_attr->ah_attr.dmac);
} else {
- ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
- if (ret)
- goto out;
- ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
- if (ret)
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ int ifindex;
+
+ ret = ib_query_gid(qp->device,
+ qp_attr->ah_attr.port_num,
+ qp_attr->ah_attr.grh.sgid_index,
+ &sgid, &sgid_attr);
+
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
goto out;
+ }
+
+ ifindex = sgid_attr.ndev->ifindex;
+
+ ret = rdma_addr_find_dmac_by_grh(&sgid,
+ &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac,
+ NULL, ifindex);
+
+ dev_put(sgid_attr.ndev);
}
- *qp_attr_mask |= IB_QP_SMAC;
- if (qp_attr->vlan_id < 0xFFFF)
- *qp_attr_mask |= IB_QP_VID;
}
out:
return ret;
}
-EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
+EXPORT_SYMBOL(ib_resolve_eth_dmac);
int ib_modify_qp(struct ib_qp *qp,
@@ -1021,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp,
{
int ret;
- ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
+ ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
if (ret)
return ret;
@@ -1253,31 +1280,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
}
EXPORT_SYMBOL(ib_alloc_mr);
-struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
- int max_page_list_len)
-{
- struct ib_fast_reg_page_list *page_list;
-
- if (!device->alloc_fast_reg_page_list)
- return ERR_PTR(-ENOSYS);
-
- page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
-
- if (!IS_ERR(page_list)) {
- page_list->device = device;
- page_list->max_page_list_len = max_page_list_len;
- }
-
- return page_list;
-}
-EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
-
-void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
- page_list->device->free_fast_reg_page_list(page_list);
-}
-EXPORT_SYMBOL(ib_free_fast_reg_page_list);
-
/* Memory windows */
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
@@ -1469,3 +1471,110 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
}
EXPORT_SYMBOL(ib_check_mr_status);
+
+/**
+ * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
+ * and set it the memory region.
+ * @mr: memory region
+ * @sg: dma mapped scatterlist
+ * @sg_nents: number of entries in sg
+ * @page_size: page vector desired page size
+ *
+ * Constraints:
+ * - The first sg element is allowed to have an offset.
+ * - Each sg element must be aligned to page_size (or physically
+ * contiguous to the previous element). In case an sg element has a
+ * non contiguous offset, the mapping prefix will not include it.
+ * - The last sg element is allowed to have length less than page_size.
+ * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
+ * then only max_num_sg entries will be mapped.
+ *
+ * Returns the number of sg elements that were mapped to the memory region.
+ *
+ * After this completes successfully, the memory region
+ * is ready for registration.
+ */
+int ib_map_mr_sg(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents,
+ unsigned int page_size)
+{
+ if (unlikely(!mr->device->map_mr_sg))
+ return -ENOSYS;
+
+ mr->page_size = page_size;
+
+ return mr->device->map_mr_sg(mr, sg, sg_nents);
+}
+EXPORT_SYMBOL(ib_map_mr_sg);
+
+/**
+ * ib_sg_to_pages() - Convert the largest prefix of a sg list
+ * to a page vector
+ * @mr: memory region
+ * @sgl: dma mapped scatterlist
+ * @sg_nents: number of entries in sg
+ * @set_page: driver page assignment function pointer
+ *
+ * Core service helper for drivers to covert the largest
+ * prefix of given sg list to a page vector. The sg list
+ * prefix converted is the prefix that meet the requirements
+ * of ib_map_mr_sg.
+ *
+ * Returns the number of sg elements that were assigned to
+ * a page vector.
+ */
+int ib_sg_to_pages(struct ib_mr *mr,
+ struct scatterlist *sgl,
+ int sg_nents,
+ int (*set_page)(struct ib_mr *, u64))
+{
+ struct scatterlist *sg;
+ u64 last_end_dma_addr = 0, last_page_addr = 0;
+ unsigned int last_page_off = 0;
+ u64 page_mask = ~((u64)mr->page_size - 1);
+ int i;
+
+ mr->iova = sg_dma_address(&sgl[0]);
+ mr->length = 0;
+
+ for_each_sg(sgl, sg, sg_nents, i) {
+ u64 dma_addr = sg_dma_address(sg);
+ unsigned int dma_len = sg_dma_len(sg);
+ u64 end_dma_addr = dma_addr + dma_len;
+ u64 page_addr = dma_addr & page_mask;
+
+ if (i && page_addr != dma_addr) {
+ if (last_end_dma_addr != dma_addr) {
+ /* gap */
+ goto done;
+
+ } else if (last_page_off + dma_len <= mr->page_size) {
+ /* chunk this fragment with the last */
+ mr->length += dma_len;
+ last_end_dma_addr += dma_len;
+ last_page_off += dma_len;
+ continue;
+ } else {
+ /* map starting from the next page */
+ page_addr = last_page_addr + mr->page_size;
+ dma_len -= mr->page_size - last_page_off;
+ }
+ }
+
+ do {
+ if (unlikely(set_page(mr, page_addr)))
+ goto done;
+ page_addr += mr->page_size;
+ } while (page_addr < end_dma_addr);
+
+ mr->length += dma_len;
+ last_end_dma_addr = end_dma_addr;
+ last_page_addr = end_dma_addr & page_mask;
+ last_page_off = end_dma_addr & ~page_mask;
+ }
+
+done:
+ return i;
+}
+EXPORT_SYMBOL(ib_sg_to_pages);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index cf5474ae68ff..cfe404925a39 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -123,7 +123,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->opcode = IB_WC_LOCAL_INV;
break;
case T3_FAST_REGISTER:
- wc->opcode = IB_WC_FAST_REG_MR;
+ wc->opcode = IB_WC_REG_MR;
break;
default:
printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 93308c45f298..c34725ca0bb4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -463,6 +463,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
return -EINVAL;
mhp = to_iwch_mr(ib_mr);
+ kfree(mhp->pages);
rhp = mhp->rhp;
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
@@ -821,6 +822,12 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
if (!mhp)
goto err;
+ mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mhp->pages) {
+ ret = -ENOMEM;
+ goto pl_err;
+ }
+
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, max_num_sg);
if (ret)
@@ -847,31 +854,34 @@ err3:
err2:
iwch_free_pbl(mhp);
err1:
+ kfree(mhp->pages);
+pl_err:
kfree(mhp);
err:
return ERR_PTR(ret);
}
-static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
- struct ib_device *device,
- int page_list_len)
+static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
{
- struct ib_fast_reg_page_list *page_list;
+ struct iwch_mr *mhp = to_iwch_mr(ibmr);
- page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64),
- GFP_KERNEL);
- if (!page_list)
- return ERR_PTR(-ENOMEM);
+ if (unlikely(mhp->npages == mhp->attr.pbl_size))
+ return -ENOMEM;
- page_list->page_list = (u64 *)(page_list + 1);
- page_list->max_page_list_len = page_list_len;
+ mhp->pages[mhp->npages++] = addr;
- return page_list;
+ return 0;
}
-static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list)
+static int iwch_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- kfree(page_list);
+ struct iwch_mr *mhp = to_iwch_mr(ibmr);
+
+ mhp->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page);
}
static int iwch_destroy_qp(struct ib_qp *ib_qp)
@@ -1450,8 +1460,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_mr = iwch_alloc_mr;
- dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
- dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
+ dev->ibdev.map_mr_sg = iwch_map_mr_sg;
dev->ibdev.attach_mcast = iwch_multicast_attach;
dev->ibdev.detach_mcast = iwch_multicast_detach;
dev->ibdev.process_mad = iwch_process_mad;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 87c14b0c5ac0..2ac85b86a680 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -77,6 +77,8 @@ struct iwch_mr {
struct iwch_dev *rhp;
u64 kva;
struct tpt_attributes attr;
+ u64 *pages;
+ u32 npages;
};
typedef struct iwch_mw iwch_mw_handle;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index b57c0befd962..d0548fc6395e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -95,8 +95,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
wqe->write.reserved[0] = 0;
wqe->write.reserved[1] = 0;
wqe->write.reserved[2] = 0;
- wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
@@ -137,8 +137,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
wqe->read.local_inv = 0;
wqe->read.reserved[0] = 0;
wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
@@ -146,27 +146,28 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
+ u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
{
+ struct iwch_mr *mhp = to_iwch_mr(wr->mr);
int i;
__be64 *p;
- if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
+ if (mhp->npages > T3_MAX_FASTREG_DEPTH)
return -EINVAL;
*wr_cnt = 1;
- wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
- wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
- wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+ wqe->fastreg.stag = cpu_to_be32(wr->key);
+ wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
+ wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
wqe->fastreg.va_base_lo_fbo =
- cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
+ cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
wqe->fastreg.page_type_perms = cpu_to_be32(
- V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
- V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
+ V_FR_PAGE_COUNT(mhp->npages) |
+ V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
V_FR_TYPE(TPT_VATO) |
- V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
+ V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
p = &wqe->fastreg.pbl_addrs[0];
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
+ for (i = 0; i < mhp->npages; i++, p++) {
/* If we need a 2nd WR, then set it up */
if (i == T3_MAX_FASTREG_FRAG) {
@@ -175,14 +176,14 @@ static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
Q_PTR2IDX((wq->wptr+1), wq->size_log2));
build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
Q_GENBIT(wq->wptr + 1, wq->size_log2),
- 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
+ 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
T3_EOP);
p = &wqe->pbl_frag.pbl_addrs[0];
}
- *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
+ *p = cpu_to_be64((u64)mhp->pages[i]);
}
- *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
+ *flit_cnt = 5 + mhp->npages;
if (*flit_cnt > 15)
*flit_cnt = 15;
return 0;
@@ -414,10 +415,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (!qhp->wq.oldest_read)
qhp->wq.oldest_read = sqp;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
t3_wr_opcode = T3_WR_FASTREG;
- err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
- &wr_cnt, &qhp->wq);
+ err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
+ &wr_cnt, &qhp->wq);
break;
case IB_WR_LOCAL_INV:
if (wr->send_flags & IB_SEND_FENCE)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index debc39d2cbc2..c9cffced00ca 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -632,22 +632,18 @@ static void best_mtu(const unsigned short *mtus, unsigned short mtu,
static int send_connect(struct c4iw_ep *ep)
{
- struct cpl_act_open_req *req;
- struct cpl_t5_act_open_req *t5_req;
- struct cpl_act_open_req6 *req6;
- struct cpl_t5_act_open_req6 *t5_req6;
+ struct cpl_act_open_req *req = NULL;
+ struct cpl_t5_act_open_req *t5req = NULL;
+ struct cpl_t6_act_open_req *t6req = NULL;
+ struct cpl_act_open_req6 *req6 = NULL;
+ struct cpl_t5_act_open_req6 *t5req6 = NULL;
+ struct cpl_t6_act_open_req6 *t6req6 = NULL;
struct sk_buff *skb;
u64 opt0;
u32 opt2;
unsigned int mtu_idx;
int wscale;
- int wrlen;
- int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
- sizeof(struct cpl_act_open_req) :
- sizeof(struct cpl_t5_act_open_req);
- int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
- sizeof(struct cpl_act_open_req6) :
- sizeof(struct cpl_t5_act_open_req6);
+ int win, sizev4, sizev6, wrlen;
struct sockaddr_in *la = (struct sockaddr_in *)
&ep->com.mapped_local_addr;
struct sockaddr_in *ra = (struct sockaddr_in *)
@@ -656,8 +652,28 @@ static int send_connect(struct c4iw_ep *ep)
&ep->com.mapped_local_addr;
struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
&ep->com.mapped_remote_addr;
- int win;
int ret;
+ enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+ case CHELSIO_T4:
+ sizev4 = sizeof(struct cpl_act_open_req);
+ sizev6 = sizeof(struct cpl_act_open_req6);
+ break;
+ case CHELSIO_T5:
+ sizev4 = sizeof(struct cpl_t5_act_open_req);
+ sizev6 = sizeof(struct cpl_t5_act_open_req6);
+ break;
+ case CHELSIO_T6:
+ sizev4 = sizeof(struct cpl_t6_act_open_req);
+ sizev6 = sizeof(struct cpl_t6_act_open_req6);
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(adapter_type));
+ return -EINVAL;
+ }
wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
roundup(sizev4, 16) :
@@ -706,7 +722,10 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= SACK_EN_F;
if (wscale && enable_tcp_window_scaling)
opt2 |= WND_SCALE_EN_F;
- if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
+ if (peer2peer)
+ isn += 4;
+
opt2 |= T5_OPT_2_VALID_F;
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
opt2 |= T5_ISS_F;
@@ -718,102 +737,109 @@ static int send_connect(struct c4iw_ep *ep)
t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
- if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- if (ep->com.remote_addr.ss_family == AF_INET) {
- req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
+ if (ep->com.remote_addr.ss_family == AF_INET) {
+ switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+ case CHELSIO_T4:
+ req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
INIT_TP_WR(req, 0);
- OPCODE_TID(req) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
- ((ep->rss_qid << 14) | ep->atid)));
- req->local_port = la->sin_port;
- req->peer_port = ra->sin_port;
- req->local_ip = la->sin_addr.s_addr;
- req->peer_ip = ra->sin_addr.s_addr;
- req->opt0 = cpu_to_be64(opt0);
+ break;
+ case CHELSIO_T5:
+ t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t5req, 0);
+ req = (struct cpl_act_open_req *)t5req;
+ break;
+ case CHELSIO_T6:
+ t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t6req, 0);
+ req = (struct cpl_act_open_req *)t6req;
+ t5req = (struct cpl_t5_act_open_req *)t6req;
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(adapter_type));
+ ret = -EINVAL;
+ goto clip_release;
+ }
+
+ OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+ ((ep->rss_qid<<14) | ep->atid)));
+ req->local_port = la->sin_port;
+ req->peer_port = ra->sin_port;
+ req->local_ip = la->sin_addr.s_addr;
+ req->peer_ip = ra->sin_addr.s_addr;
+ req->opt0 = cpu_to_be64(opt0);
+
+ if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
req->params = cpu_to_be32(cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
req->opt2 = cpu_to_be32(opt2);
} else {
+ t5req->params = cpu_to_be64(FILTER_TUPLE_V(
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t)));
+ t5req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
+ t5req->opt2 = cpu_to_be32(opt2);
+ }
+ } else {
+ switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+ case CHELSIO_T4:
req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
-
INIT_TP_WR(req6, 0);
- OPCODE_TID(req6) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
- ((ep->rss_qid<<14)|ep->atid)));
- req6->local_port = la6->sin6_port;
- req6->peer_port = ra6->sin6_port;
- req6->local_ip_hi = *((__be64 *)
- (la6->sin6_addr.s6_addr));
- req6->local_ip_lo = *((__be64 *)
- (la6->sin6_addr.s6_addr + 8));
- req6->peer_ip_hi = *((__be64 *)
- (ra6->sin6_addr.s6_addr));
- req6->peer_ip_lo = *((__be64 *)
- (ra6->sin6_addr.s6_addr + 8));
- req6->opt0 = cpu_to_be64(opt0);
+ break;
+ case CHELSIO_T5:
+ t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t5req6, 0);
+ req6 = (struct cpl_act_open_req6 *)t5req6;
+ break;
+ case CHELSIO_T6:
+ t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t6req6, 0);
+ req6 = (struct cpl_act_open_req6 *)t6req6;
+ t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(adapter_type));
+ ret = -EINVAL;
+ goto clip_release;
+ }
+
+ OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ ((ep->rss_qid<<14)|ep->atid)));
+ req6->local_port = la6->sin6_port;
+ req6->peer_port = ra6->sin6_port;
+ req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
+ req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
+ req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
+ req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
+ req6->opt0 = cpu_to_be64(opt0);
+
+ if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
req6->params = cpu_to_be32(cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
req6->opt2 = cpu_to_be32(opt2);
- }
- } else {
- u32 isn = (prandom_u32() & ~7UL) - 1;
-
- if (peer2peer)
- isn += 4;
-
- if (ep->com.remote_addr.ss_family == AF_INET) {
- t5_req = (struct cpl_t5_act_open_req *)
- skb_put(skb, wrlen);
- INIT_TP_WR(t5_req, 0);
- OPCODE_TID(t5_req) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
- ((ep->rss_qid << 14) | ep->atid)));
- t5_req->local_port = la->sin_port;
- t5_req->peer_port = ra->sin_port;
- t5_req->local_ip = la->sin_addr.s_addr;
- t5_req->peer_ip = ra->sin_addr.s_addr;
- t5_req->opt0 = cpu_to_be64(opt0);
- t5_req->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5_req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__,
- be32_to_cpu(t5_req->rsvd));
- t5_req->opt2 = cpu_to_be32(opt2);
} else {
- t5_req6 = (struct cpl_t5_act_open_req6 *)
- skb_put(skb, wrlen);
- INIT_TP_WR(t5_req6, 0);
- OPCODE_TID(t5_req6) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
- ((ep->rss_qid<<14)|ep->atid)));
- t5_req6->local_port = la6->sin6_port;
- t5_req6->peer_port = ra6->sin6_port;
- t5_req6->local_ip_hi = *((__be64 *)
- (la6->sin6_addr.s6_addr));
- t5_req6->local_ip_lo = *((__be64 *)
- (la6->sin6_addr.s6_addr + 8));
- t5_req6->peer_ip_hi = *((__be64 *)
- (ra6->sin6_addr.s6_addr));
- t5_req6->peer_ip_lo = *((__be64 *)
- (ra6->sin6_addr.s6_addr + 8));
- t5_req6->opt0 = cpu_to_be64(opt0);
- t5_req6->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
+ t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
+ cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t)));
- t5_req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__,
- be32_to_cpu(t5_req6->rsvd));
- t5_req6->opt2 = cpu_to_be32(opt2);
+ t5req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
+ t5req6->opt2 = cpu_to_be32(opt2);
}
}
set_bit(ACT_OPEN_REQ, &ep->com.history);
ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+clip_release:
if (ret && ep->com.remote_addr.ss_family == AF_INET6)
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&la6->sin6_addr.s6_addr, 1);
@@ -1196,6 +1222,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
if ((status == 0) || (status == -ECONNREFUSED)) {
if (!ep->tried_with_mpa_v1) {
/* this means MPA_v2 is used */
+ event.ord = ep->ird;
+ event.ird = ep->ord;
event.private_data_len = ep->plen -
sizeof(struct mpa_v2_conn_params);
event.private_data = ep->mpa_pkt +
@@ -1203,6 +1231,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
sizeof(struct mpa_v2_conn_params);
} else {
/* this means MPA_v1 is used */
+ event.ord = cur_max_read_depth(ep->com.dev);
+ event.ird = cur_max_read_depth(ep->com.dev);
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt +
sizeof(struct mpa_message);
@@ -1265,8 +1295,8 @@ static void established_upcall(struct c4iw_ep *ep)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
- event.ird = ep->ird;
- event.ord = ep->ord;
+ event.ird = ep->ord;
+ event.ord = ep->ird;
if (ep->com.cm_id) {
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1898,7 +1928,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
struct dst_entry *dst, struct c4iw_dev *cdev,
- bool clear_mpa_v1)
+ bool clear_mpa_v1, enum chip_type adapter_type)
{
struct neighbour *n;
int err, step;
@@ -1933,7 +1963,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
goto out;
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+ ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
+ cxgb4_port_viid(pdev));
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -1952,7 +1983,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
goto out;
ep->mtu = dst_mtu(dst);
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+ ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
+ cxgb4_port_viid(pdev));
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2025,7 +2057,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
err = -EHOSTUNREACH;
goto fail3;
}
- err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false);
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
+ ep->com.dev->rdev.lldi.adapter_type);
if (err) {
pr_err("%s - cannot alloc l2e.\n", __func__);
goto fail4;
@@ -2213,13 +2246,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
int wscale;
struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
int win;
+ enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
skb_get(skb);
rpl = cplhdr(skb);
- if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ if (!is_t4(adapter_type)) {
skb_trim(skb, roundup(sizeof(*rpl5), 16));
rpl5 = (void *)rpl;
INIT_TP_WR(rpl5, ep->hwtid);
@@ -2266,12 +2300,16 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
const struct tcphdr *tcph;
u32 hlen = ntohl(req->hdr_len);
- tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
- IP_HDR_LEN_G(hlen);
+ if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
+ tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
+ IP_HDR_LEN_G(hlen);
+ else
+ tcph = (const void *)(req + 1) +
+ T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN_V(1);
}
- if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
u32 isn = (prandom_u32() & ~7UL) - 1;
opt2 |= T5_OPT_2_VALID_F;
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
@@ -2302,12 +2340,16 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
return;
}
-static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype,
- __u8 *local_ip, __u8 *peer_ip,
+static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
+ int *iptype, __u8 *local_ip, __u8 *peer_ip,
__be16 *local_port, __be16 *peer_port)
{
- int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+ int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+ ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+ T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+ int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+ IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+ T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
struct tcphdr *tcp = (struct tcphdr *)
@@ -2362,7 +2404,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port);
+ get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
+ local_ip, peer_ip, &local_port, &peer_port);
/* Find output route */
if (iptype == 4) {
@@ -2397,7 +2440,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- err = import_ep(child_ep, iptype, peer_ip, dst, dev, false);
+ err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
+ parent_ep->com.dev->rdev.lldi.adapter_type);
if (err) {
printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
__func__);
@@ -2929,7 +2973,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
} else {
if (peer2peer &&
(ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
- (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0)
+ (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
ep->ird = 1;
}
@@ -3189,7 +3233,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto fail2;
}
- err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
+ ep->com.dev->rdev.lldi.adapter_type);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
goto fail3;
@@ -3260,6 +3305,10 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
sin->sin_addr.s_addr, sin->sin_port, 0,
ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
if (err == -EBUSY) {
+ if (c4iw_fatal_error(&ep->com.dev->rdev)) {
+ err = -EIO;
+ break;
+ }
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(100));
}
@@ -3593,20 +3642,23 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
{
- u32 l2info;
- u16 vlantag, len, hdr_len, eth_hdr_len;
+ __be32 l2info;
+ __be16 hdr_len, vlantag, len;
+ u16 eth_hdr_len;
+ int tcp_hdr_len, ip_hdr_len;
u8 intf;
struct cpl_rx_pkt *cpl = cplhdr(skb);
struct cpl_pass_accept_req *req;
struct tcp_options_received tmp_opt;
struct c4iw_dev *dev;
+ enum chip_type type;
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
/* Store values from cpl_rx_pkt in temporary location. */
- vlantag = (__force u16) cpl->vlan;
- len = (__force u16) cpl->len;
- l2info = (__force u32) cpl->l2info;
- hdr_len = (__force u16) cpl->hdr_len;
+ vlantag = cpl->vlan;
+ len = cpl->len;
+ l2info = cpl->l2info;
+ hdr_len = cpl->hdr_len;
intf = cpl->iff;
__skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
@@ -3623,20 +3675,28 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
memset(req, 0, sizeof(*req));
req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
SYN_MAC_IDX_V(RX_MACIDX_G(
- (__force int) htonl(l2info))) |
+ be32_to_cpu(l2info))) |
SYN_XACT_MATCH_F);
- eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
- RX_ETHHDR_LEN_G((__force int)htonl(l2info)) :
- RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info));
- req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(
- (__force int) htonl(l2info))) |
- TCP_HDR_LEN_V(RX_TCPHDR_LEN_G(
- (__force int) htons(hdr_len))) |
- IP_HDR_LEN_V(RX_IPHDR_LEN_G(
- (__force int) htons(hdr_len))) |
- ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len)));
- req->vlan = (__force __be16) vlantag;
- req->len = (__force __be16) len;
+ type = dev->rdev.lldi.adapter_type;
+ tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
+ ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
+ req->hdr_len =
+ cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
+ if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
+ eth_hdr_len = is_t4(type) ?
+ RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
+ RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
+ req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
+ IP_HDR_LEN_V(ip_hdr_len) |
+ ETH_HDR_LEN_V(eth_hdr_len));
+ } else { /* T6 and later */
+ eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
+ req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
+ T6_IP_HDR_LEN_V(ip_hdr_len) |
+ T6_ETH_HDR_LEN_V(eth_hdr_len));
+ }
+ req->vlan = vlantag;
+ req->len = len;
req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
PASS_OPEN_TOS_V(tos));
req->tcpopt.mss = htons(tmp_opt.mss_clamp);
@@ -3755,9 +3815,22 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
- RX_ETHHDR_LEN_G(htonl(cpl->l2info)) :
- RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info));
+ switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
+ case CHELSIO_T4:
+ eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+ break;
+ case CHELSIO_T5:
+ eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+ break;
+ case CHELSIO_T6:
+ eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
+ goto reject;
+ }
+
if (eth_hdr_len == ETH_HLEN) {
eh = (struct ethhdr *)(req + 1);
iph = (struct iphdr *)(eh + 1);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 92d518382a9f..de9cd6901752 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -752,7 +752,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->opcode = IB_WC_LOCAL_INV;
break;
case FW_RI_FAST_REGISTER:
- wc->opcode = IB_WC_FAST_REG_MR;
+ wc->opcode = IB_WC_REG_MR;
break;
default:
printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 1a297391b54c..58fce1742b8d 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -962,12 +962,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.lldi.sge_egrstatuspagesize;
/*
- * For T5 devices, we map all of BAR2 with WC.
+ * For T5/T6 devices, we map all of BAR2 with WC.
* For T4 devices with onchip qp mem, we map only that part
* of BAR2 with WC.
*/
devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
- if (is_t5(devp->rdev.lldi.adapter_type)) {
+ if (!is_t4(devp->rdev.lldi.adapter_type)) {
devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
pci_resource_len(devp->rdev.lldi.pdev, 2));
if (!devp->rdev.bar2_kva) {
@@ -1267,11 +1267,9 @@ static int enable_qp_db(int id, void *p, void *data)
static void resume_rc_qp(struct c4iw_qp *qp)
{
spin_lock(&qp->lock);
- t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
- is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
qp->wq.sq.wq_pidx_inc = 0;
- t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
- is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
qp->wq.rq.wq_pidx_inc = 0;
spin_unlock(&qp->lock);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index c7bb38c931a5..00e55faa086a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -386,6 +386,10 @@ struct c4iw_mr {
struct c4iw_dev *rhp;
u64 kva;
struct tpt_attributes attr;
+ u64 *mpl;
+ dma_addr_t mpl_addr;
+ u32 max_mpl_len;
+ u32 mpl_len;
};
static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
@@ -405,20 +409,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
return container_of(ibmw, struct c4iw_mw, ibmw);
}
-struct c4iw_fr_page_list {
- struct ib_fast_reg_page_list ibpl;
- DEFINE_DMA_UNMAP_ADDR(mapping);
- dma_addr_t dma_addr;
- struct c4iw_dev *dev;
- int pll_len;
-};
-
-static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
- struct ib_fast_reg_page_list *ibpl)
-{
- return container_of(ibpl, struct c4iw_fr_page_list, ibpl);
-}
-
struct c4iw_cq {
struct ib_cq ibcq;
struct c4iw_dev *rhp;
@@ -966,13 +956,12 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
void c4iw_qp_rem_ref(struct ib_qp *qp);
-void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list);
-struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
- struct ib_device *device,
- int page_list_len);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
int c4iw_dealloc_mw(struct ib_mw *mw);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 026b91ebd5e2..e1629ab58db7 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
if (i == (num_wqe-1)) {
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
FW_WR_COMPL_F);
- req->wr.wr_lo = (__force __be64)&wr_wait;
+ req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait;
} else
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
req->wr.wr_mid = cpu_to_be32(
@@ -863,6 +863,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
u32 mmid;
u32 stag = 0;
int ret = 0;
+ int length = roundup(max_num_sg * sizeof(u64), 32);
if (mr_type != IB_MR_TYPE_MEM_REG ||
max_num_sg > t4_max_fr_depth(use_dsgl))
@@ -876,6 +877,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
goto err;
}
+ mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev,
+ length, &mhp->mpl_addr, GFP_KERNEL);
+ if (!mhp->mpl) {
+ ret = -ENOMEM;
+ goto err_mpl;
+ }
+ mhp->max_mpl_len = length;
+
mhp->rhp = rhp;
ret = alloc_pbl(mhp, max_num_sg);
if (ret)
@@ -905,54 +914,35 @@ err2:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
err1:
+ dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+ mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
+err_mpl:
kfree(mhp);
err:
return ERR_PTR(ret);
}
-struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
- int page_list_len)
+static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
{
- struct c4iw_fr_page_list *c4pl;
- struct c4iw_dev *dev = to_c4iw_dev(device);
- dma_addr_t dma_addr;
- int pll_len = roundup(page_list_len * sizeof(u64), 32);
-
- c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL);
- if (!c4pl)
- return ERR_PTR(-ENOMEM);
+ struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
- c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev,
- pll_len, &dma_addr,
- GFP_KERNEL);
- if (!c4pl->ibpl.page_list) {
- kfree(c4pl);
- return ERR_PTR(-ENOMEM);
- }
- dma_unmap_addr_set(c4pl, mapping, dma_addr);
- c4pl->dma_addr = dma_addr;
- c4pl->dev = dev;
- c4pl->pll_len = pll_len;
+ if (unlikely(mhp->mpl_len == mhp->max_mpl_len))
+ return -ENOMEM;
- PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
- __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
- &c4pl->dma_addr);
+ mhp->mpl[mhp->mpl_len++] = addr;
- return &c4pl->ibpl;
+ return 0;
}
-void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
+ struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
- PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
- __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
- &c4pl->dma_addr);
+ mhp->mpl_len = 0;
- dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
- c4pl->pll_len,
- c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
- kfree(c4pl);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page);
}
int c4iw_dereg_mr(struct ib_mr *ib_mr)
@@ -970,6 +960,9 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
rhp = mhp->rhp;
mmid = mhp->attr.stag >> 8;
remove_handle(rhp, &rhp->mmidr, mmid);
+ if (mhp->mpl)
+ dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+ mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
if (mhp->attr.pbl_size)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 7746113552e7..0a7d99818b17 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (addr >= rdev->oc_mw_pa)
vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
else {
- if (is_t5(rdev->lldi.adapter_type))
+ if (!is_t4(rdev->lldi.adapter_type))
vma->vm_page_prot =
t4_pgprot_wc(vma->vm_page_prot);
else
@@ -557,8 +557,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
- dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
- dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
+ dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
dev->ibdev.attach_mcast = c4iw_multicast_attach;
dev->ibdev.detach_mcast = c4iw_multicast_detach;
dev->ibdev.process_mad = c4iw_process_mad;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 6517e1208ccb..aa515afee724 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -528,8 +528,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
if (wr->num_sge > T4_MAX_SEND_SGE)
return -EINVAL;
wqe->write.r2 = 0;
- wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
if (wr->num_sge) {
if (wr->send_flags & IB_SEND_INLINE) {
ret = build_immd(sq, wqe->write.u.immd_src, wr,
@@ -566,10 +566,10 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
if (wr->num_sge > 1)
return -EINVAL;
if (wr->num_sge) {
- wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr
+ wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
>> 32));
- wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr);
+ wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
@@ -605,47 +605,41 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
return 0;
}
-static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
- struct ib_send_wr *wr, u8 *len16, u8 t5dev)
+static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_reg_wr *wr, u8 *len16, u8 t5dev)
{
-
+ struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
struct fw_ri_immd *imdp;
__be64 *p;
int i;
- int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
+ int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
int rem;
- if (wr->wr.fast_reg.page_list_len >
- t4_max_fr_depth(use_dsgl))
+ if (mhp->mpl_len > t4_max_fr_depth(use_dsgl))
return -EINVAL;
wqe->fr.qpbinde_to_dcacpu = 0;
- wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12;
+ wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
wqe->fr.addr_type = FW_RI_VA_BASED_TO;
- wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags);
+ wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
wqe->fr.len_hi = 0;
- wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length);
- wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
- wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
- wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &
+ wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
+ wqe->fr.stag = cpu_to_be32(wr->key);
+ wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
+ wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
0xffffffff);
if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
- struct c4iw_fr_page_list *c4pl =
- to_c4iw_fr_page_list(wr->wr.fast_reg.page_list);
struct fw_ri_dsgl *sglp;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- wr->wr.fast_reg.page_list->page_list[i] = (__force u64)
- cpu_to_be64((u64)
- wr->wr.fast_reg.page_list->page_list[i]);
- }
+ for (i = 0; i < mhp->mpl_len; i++)
+ mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
sglp->op = FW_RI_DATA_DSGL;
sglp->r1 = 0;
sglp->nsge = cpu_to_be16(1);
- sglp->addr0 = cpu_to_be64(c4pl->dma_addr);
+ sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
sglp->len0 = cpu_to_be32(pbllen);
*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
@@ -657,9 +651,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
imdp->immdlen = cpu_to_be32(pbllen);
p = (__be64 *)(imdp + 1);
rem = pbllen;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- *p = cpu_to_be64(
- (u64)wr->wr.fast_reg.page_list->page_list[i]);
+ for (i = 0; i < mhp->mpl_len; i++) {
+ *p = cpu_to_be64((u64)mhp->mpl[i]);
rem -= sizeof(*p);
if (++p == (__be64 *)&sq->queue[sq->size])
p = (__be64 *)sq->queue;
@@ -712,8 +705,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
spin_lock_irqsave(&qhp->rhp->lock, flags);
spin_lock(&qhp->lock);
if (qhp->rhp->db_state == NORMAL)
- t4_ring_sq_db(&qhp->wq, inc,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_sq_db(&qhp->wq, inc, NULL);
else {
add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
qhp->wq.sq.wq_pidx_inc += inc;
@@ -730,8 +722,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
spin_lock_irqsave(&qhp->rhp->lock, flags);
spin_lock(&qhp->lock);
if (qhp->rhp->db_state == NORMAL)
- t4_ring_rq_db(&qhp->wq, inc,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_rq_db(&qhp->wq, inc, NULL);
else {
add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
qhp->wq.rq.wq_pidx_inc += inc;
@@ -813,13 +804,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (!qhp->wq.sq.oldest_read)
qhp->wq.sq.oldest_read = swsqe;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
fw_opcode = FW_RI_FR_NSMR_WR;
swsqe->opcode = FW_RI_FAST_REGISTER;
- err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16,
- is_t5(
- qhp->rhp->rdev.lldi.adapter_type) ?
- 1 : 0);
+ err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
+ is_t5(
+ qhp->rhp->rdev.lldi.adapter_type) ?
+ 1 : 0);
break;
case IB_WR_LOCAL_INV:
if (wr->send_flags & IB_SEND_FENCE)
@@ -860,8 +851,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
}
if (!qhp->rhp->rdev.status_page->db_off) {
- t4_ring_sq_db(&qhp->wq, idx,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ t4_ring_sq_db(&qhp->wq, idx, wqe);
spin_unlock_irqrestore(&qhp->lock, flag);
} else {
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -934,8 +924,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
num_wrs--;
}
if (!qhp->rhp->rdev.status_page->db_off) {
- t4_ring_rq_db(&qhp->wq, idx,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ t4_ring_rq_db(&qhp->wq, idx, wqe);
spin_unlock_irqrestore(&qhp->lock, flag);
} else {
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -1875,7 +1864,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attrs.rq_db_inc = attr->rq_psn;
mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
- if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
+ if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
(mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
return -EINVAL;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 274a7ab13bef..1092a2d1f607 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -455,8 +455,7 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
}
}
-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
- union t4_wr *wqe)
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
{
/* Flush host queue memory writes. */
@@ -482,7 +481,7 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db);
}
-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
union t4_recv_wr *wqe)
{
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 1688a17de4fe..86af71351d9a 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -76,7 +76,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
struct mlx4_dev *dev = ibdev->dev;
int is_mcast = 0;
struct in6_addr in6;
- u16 vlan_tag;
+ u16 vlan_tag = 0xffff;
+ union ib_gid sgid;
+ struct ib_gid_attr gid_attr;
+ int ret;
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6)) {
@@ -85,7 +88,17 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
} else {
memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
}
- vlan_tag = ah_attr->vlan_id;
+ ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
+ ah_attr->grh.sgid_index, &sgid, &gid_attr);
+ if (ret)
+ return ERR_PTR(ret);
+ memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+ if (gid_attr.ndev) {
+ if (is_vlan_dev(gid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
+ memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gid_attr.ndev);
+ }
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5fd49f9435f9..b88fc8f5ab18 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -818,7 +818,7 @@ repoll:
wc->opcode = IB_WC_LSO;
break;
case MLX4_OPCODE_FMR:
- wc->opcode = IB_WC_FAST_REG_MR;
+ wc->opcode = IB_WC_REG_MR;
break;
case MLX4_OPCODE_LOCAL_INVAL:
wc->opcode = IB_WC_LOCAL_INV;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 1cd75ff02251..870e56b6b25f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -457,7 +457,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
struct ib_grh *grh, struct ib_mad *mad)
{
struct ib_sge list;
- struct ib_send_wr wr, *bad_wr;
+ struct ib_ud_wr wr;
+ struct ib_send_wr *bad_wr;
struct mlx4_ib_demux_pv_ctx *tun_ctx;
struct mlx4_ib_demux_pv_qp *tun_qp;
struct mlx4_rcv_tunnel_mad *tun_mad;
@@ -582,18 +583,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
list.length = sizeof (struct mlx4_rcv_tunnel_mad);
list.lkey = tun_ctx->pd->local_dma_lkey;
- wr.wr.ud.ah = ah;
- wr.wr.ud.port_num = port;
- wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
- wr.wr.ud.remote_qpn = dqpn;
- wr.next = NULL;
- wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IB_WR_SEND;
- wr.send_flags = IB_SEND_SIGNALED;
-
- ret = ib_post_send(src_qp, &wr, &bad_wr);
+ wr.ah = ah;
+ wr.port_num = port;
+ wr.remote_qkey = IB_QP_SET_QKEY;
+ wr.remote_qpn = dqpn;
+ wr.wr.next = NULL;
+ wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
+ wr.wr.sg_list = &list;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = IB_WR_SEND;
+ wr.wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
out:
if (ret)
ib_destroy_ah(ah);
@@ -824,18 +825,29 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
{
struct mlx4_counter counter_stats;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- int err;
+ struct counter_index *tmp_counter;
+ int err = IB_MAD_RESULT_FAILURE, stats_avail = 0;
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
return -EINVAL;
memset(&counter_stats, 0, sizeof(counter_stats));
- err = mlx4_get_counter_stats(dev->dev,
- dev->counters[port_num - 1].index,
- &counter_stats, 0);
- if (err)
- err = IB_MAD_RESULT_FAILURE;
- else {
+ mutex_lock(&dev->counters_table[port_num - 1].mutex);
+ list_for_each_entry(tmp_counter,
+ &dev->counters_table[port_num - 1].counters_list,
+ list) {
+ err = mlx4_get_counter_stats(dev->dev,
+ tmp_counter->index,
+ &counter_stats, 0);
+ if (err) {
+ err = IB_MAD_RESULT_FAILURE;
+ stats_avail = 0;
+ break;
+ }
+ stats_avail = 1;
+ }
+ mutex_unlock(&dev->counters_table[port_num - 1].mutex);
+ if (stats_avail) {
memset(out_mad->data, 0, sizeof out_mad->data);
switch (counter_stats.counter_mode & 0xf) {
case 0:
@@ -1172,10 +1184,11 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
- u8 *s_mac, struct ib_mad *mad)
+ u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
{
struct ib_sge list;
- struct ib_send_wr wr, *bad_wr;
+ struct ib_ud_wr wr;
+ struct ib_send_wr *bad_wr;
struct mlx4_ib_demux_pv_ctx *sqp_ctx;
struct mlx4_ib_demux_pv_qp *sqp;
struct mlx4_mad_snd_buf *sqp_mad;
@@ -1246,22 +1259,25 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
list.length = sizeof (struct mlx4_mad_snd_buf);
list.lkey = sqp_ctx->pd->local_dma_lkey;
- wr.wr.ud.ah = ah;
- wr.wr.ud.port_num = port;
- wr.wr.ud.pkey_index = wire_pkey_ix;
- wr.wr.ud.remote_qkey = qkey;
- wr.wr.ud.remote_qpn = remote_qpn;
- wr.next = NULL;
- wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IB_WR_SEND;
- wr.send_flags = IB_SEND_SIGNALED;
+ wr.ah = ah;
+ wr.port_num = port;
+ wr.pkey_index = wire_pkey_ix;
+ wr.remote_qkey = qkey;
+ wr.remote_qpn = remote_qpn;
+ wr.wr.next = NULL;
+ wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
+ wr.wr.sg_list = &list;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = IB_WR_SEND;
+ wr.wr.send_flags = IB_SEND_SIGNALED;
if (s_mac)
memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+ if (vlan_id < 0x1000)
+ vlan_id |= (attr->sl & 7) << 13;
+ to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
- ret = ib_post_send(send_qp, &wr, &bad_wr);
+ ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
out:
if (ret)
ib_destroy_ah(ah);
@@ -1295,6 +1311,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
u8 *slave_id;
int slave;
int port;
+ u16 vlan_id;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1383,10 +1400,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
- ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+ vlan_id = be16_to_cpu(tunnel->hdr.vlan);
/* if slave have default vlan use it */
mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
- &ah_attr.vlan_id, &ah_attr.sl);
+ &vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1394,7 +1411,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, wc->smac, &tunnel->mad);
+ &ah_attr, wc->smac, vlan_id, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index efecdf0216d8..f567160a4a56 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -335,7 +335,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
return index;
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid);
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
if (ret)
return ret;
@@ -442,6 +442,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->persist->pdev->device;
@@ -754,7 +756,7 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
if (!rdma_cap_roce_gid_table(ibdev, port))
return -ENODEV;
- ret = ib_get_cached_gid(ibdev, port, index, gid);
+ ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
if (ret == -EAGAIN) {
memcpy(gid, &zgid, sizeof(*gid));
return 0;
@@ -1247,6 +1249,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
return 0;
}
+static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
+ struct mlx4_ib_counters *ctr_table)
+{
+ struct counter_index *counter, *tmp_count;
+
+ mutex_lock(&ctr_table->mutex);
+ list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
+ list) {
+ if (counter->allocated)
+ mlx4_counter_free(ibdev->dev, counter->index);
+ list_del(&counter->list);
+ kfree(counter);
+ }
+ mutex_unlock(&ctr_table->mutex);
+}
+
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
@@ -2131,6 +2149,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int num_req_counters;
int allocated;
u32 counter_index;
+ struct counter_index *new_counter_index = NULL;
pr_info_once("%s", mlx4_ib_version);
@@ -2247,8 +2266,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
- ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
- ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
+ ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
@@ -2293,7 +2311,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ);
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
mlx4_ib_alloc_eqs(dev, ibdev);
@@ -2302,6 +2321,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (init_node_data(ibdev))
goto err_map;
+ for (i = 0; i < ibdev->num_ports; ++i) {
+ mutex_init(&ibdev->counters_table[i].mutex);
+ INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+ }
+
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
for (i = 0; i < num_req_counters; ++i) {
mutex_init(&ibdev->qp1_proxy_lock[i]);
@@ -2320,15 +2344,34 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
counter_index = mlx4_get_default_counter_index(dev,
i + 1);
}
- ibdev->counters[i].index = counter_index;
- ibdev->counters[i].allocated = allocated;
+ new_counter_index = kmalloc(sizeof(*new_counter_index),
+ GFP_KERNEL);
+ if (!new_counter_index) {
+ if (allocated)
+ mlx4_counter_free(ibdev->dev, counter_index);
+ goto err_counter;
+ }
+ new_counter_index->index = counter_index;
+ new_counter_index->allocated = allocated;
+ list_add_tail(&new_counter_index->list,
+ &ibdev->counters_table[i].counters_list);
+ ibdev->counters_table[i].default_counter = counter_index;
pr_info("counter index %d for port %d allocated %d\n",
counter_index, i + 1, allocated);
}
if (mlx4_is_bonded(dev))
for (i = 1; i < ibdev->num_ports ; ++i) {
- ibdev->counters[i].index = ibdev->counters[0].index;
- ibdev->counters[i].allocated = 0;
+ new_counter_index =
+ kmalloc(sizeof(struct counter_index),
+ GFP_KERNEL);
+ if (!new_counter_index)
+ goto err_counter;
+ new_counter_index->index = counter_index;
+ new_counter_index->allocated = 0;
+ list_add_tail(&new_counter_index->list,
+ &ibdev->counters_table[i].counters_list);
+ ibdev->counters_table[i].default_counter =
+ counter_index;
}
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
@@ -2437,12 +2480,9 @@ err_steer_qp_release:
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
err_counter:
- for (i = 0; i < ibdev->num_ports; ++i) {
- if (ibdev->counters[i].index != -1 &&
- ibdev->counters[i].allocated)
- mlx4_counter_free(ibdev->dev,
- ibdev->counters[i].index);
- }
+ for (i = 0; i < ibdev->num_ports; ++i)
+ mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
+
err_map:
iounmap(ibdev->uar_map);
@@ -2546,9 +2586,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
- if (ibdev->counters[p].index != -1 &&
- ibdev->counters[p].allocated)
- mlx4_counter_free(ibdev->dev, ibdev->counters[p].index);
+ mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
+
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 2d5bccd71fc6..99451d887266 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -222,7 +222,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
spin_unlock_irqrestore(&dev->sm_lock, flags);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
- &ah_attr, NULL, mad);
+ &ah_attr, NULL, 0xffff, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1e7b23bb2eb0..1caa11edac03 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -129,10 +129,17 @@ struct mlx4_ib_cq {
struct list_head recv_qp_list;
};
+#define MLX4_MR_PAGES_ALIGN 0x40
+
struct mlx4_ib_mr {
struct ib_mr ibmr;
+ __be64 *pages;
+ dma_addr_t page_map;
+ u32 npages;
+ u32 max_pages;
struct mlx4_mr mmr;
struct ib_umem *umem;
+ void *pages_alloc;
};
struct mlx4_ib_mw {
@@ -140,12 +147,6 @@ struct mlx4_ib_mw {
struct mlx4_mw mmw;
};
-struct mlx4_ib_fast_reg_page_list {
- struct ib_fast_reg_page_list ibfrpl;
- __be64 *mapped_page_list;
- dma_addr_t map;
-};
-
struct mlx4_ib_fmr {
struct ib_fmr ibfmr;
struct mlx4_fmr mfmr;
@@ -320,6 +321,7 @@ struct mlx4_ib_qp {
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
+ struct counter_index *counter_index;
};
struct mlx4_ib_srq {
@@ -528,10 +530,17 @@ struct mlx4_ib_iov_port {
};
struct counter_index {
+ struct list_head list;
u32 index;
u8 allocated;
};
+struct mlx4_ib_counters {
+ struct list_head counters_list;
+ struct mutex mutex; /* mutex for accessing counters list */
+ u32 default_counter;
+};
+
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@@ -550,7 +559,7 @@ struct mlx4_ib_dev {
struct mutex cap_mask_mutex;
bool ib_active;
struct mlx4_ib_iboe iboe;
- struct counter_index counters[MLX4_MAX_PORTS];
+ struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS];
int *eq_table;
struct kobject *iov_parent;
struct kobject *ports_parent;
@@ -638,11 +647,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx4_ib_mw, ibmw);
}
-static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
-{
- return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
-}
-
static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -706,10 +710,9 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len);
-void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
@@ -813,7 +816,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
- struct ib_mad *mad);
+ u16 vlan_id, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 2542fd3c1a49..4d1e1c632603 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
struct mlx4_ib_mr *mr;
int err;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
int n;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -271,11 +271,59 @@ release_mpt_entry:
return err;
}
+static int
+mlx4_alloc_priv_pages(struct ib_device *device,
+ struct mlx4_ib_mr *mr,
+ int max_pages)
+{
+ int size = max_pages * sizeof(u64);
+ int add_size;
+ int ret;
+
+ add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+
+ mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
+ if (!mr->pages_alloc)
+ return -ENOMEM;
+
+ mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
+
+ mr->page_map = dma_map_single(device->dma_device, mr->pages,
+ size, DMA_TO_DEVICE);
+
+ if (dma_mapping_error(device->dma_device, mr->page_map)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+err:
+ kfree(mr->pages_alloc);
+
+ return ret;
+}
+
+static void
+mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
+{
+ if (mr->pages) {
+ struct ib_device *device = mr->ibmr.device;
+ int size = mr->max_pages * sizeof(u64);
+
+ dma_unmap_single(device->dma_device, mr->page_map,
+ size, DMA_TO_DEVICE);
+ kfree(mr->pages_alloc);
+ mr->pages = NULL;
+ }
+}
+
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int ret;
+ mlx4_free_priv_pages(mr);
+
ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (ret)
return ret;
@@ -321,21 +369,21 @@ err_free:
int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind)
{
- struct ib_send_wr wr;
+ struct ib_bind_mw_wr wr;
struct ib_send_wr *bad_wr;
int ret;
memset(&wr, 0, sizeof(wr));
- wr.opcode = IB_WR_BIND_MW;
- wr.wr_id = mw_bind->wr_id;
- wr.send_flags = mw_bind->send_flags;
- wr.wr.bind_mw.mw = mw;
- wr.wr.bind_mw.bind_info = mw_bind->bind_info;
- wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey);
-
- ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
+ wr.wr.opcode = IB_WR_BIND_MW;
+ wr.wr.wr_id = mw_bind->wr_id;
+ wr.wr.send_flags = mw_bind->send_flags;
+ wr.mw = mw;
+ wr.bind_info = mw_bind->bind_info;
+ wr.rkey = ib_inc_rkey(mw->rkey);
+
+ ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
if (!ret)
- mw->rkey = wr.wr.bind_mw.rkey;
+ mw->rkey = wr.rkey;
return ret;
}
@@ -362,7 +410,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
max_num_sg > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -371,71 +419,30 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_free;
+ err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
+ if (err)
+ goto err_free_mr;
+
+ mr->max_pages = max_num_sg;
+
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
- goto err_mr;
+ goto err_free_pl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->umem = NULL;
return &mr->ibmr;
-err_mr:
+err_free_pl:
+ mlx4_free_priv_pages(mr);
+err_free_mr:
(void) mlx4_mr_free(dev->dev, &mr->mmr);
-
err_free:
kfree(mr);
return ERR_PTR(err);
}
-struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_fast_reg_page_list *mfrpl;
- int size = page_list_len * sizeof (u64);
-
- if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
- return ERR_PTR(-EINVAL);
-
- mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
- if (!mfrpl)
- return ERR_PTR(-ENOMEM);
-
- mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
- if (!mfrpl->ibfrpl.page_list)
- goto err_free;
-
- mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
- pdev->dev,
- size, &mfrpl->map,
- GFP_KERNEL);
- if (!mfrpl->mapped_page_list)
- goto err_free;
-
- WARN_ON(mfrpl->map & 0x3f);
-
- return &mfrpl->ibfrpl;
-
-err_free:
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
- return ERR_PTR(-ENOMEM);
-}
-
-void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
- struct mlx4_ib_dev *dev = to_mdev(page_list->device);
- struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
- int size = page_list->max_page_list_len * sizeof (u64);
-
- dma_free_coherent(&dev->dev->persist->pdev->dev, size,
- mfrpl->mapped_page_list,
- mfrpl->map);
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
-}
-
struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
struct ib_fmr_attr *fmr_attr)
{
@@ -528,3 +535,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
return err;
}
+
+static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx4_ib_mr *mr = to_mmr(ibmr);
+
+ if (unlikely(mr->npages == mr->max_pages))
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
+
+ return 0;
+}
+
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
+{
+ struct mlx4_ib_mr *mr = to_mmr(ibmr);
+ int rc;
+
+ mr->npages = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
+ sizeof(u64) * mr->max_pages,
+ DMA_TO_DEVICE);
+
+ rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page);
+
+ ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
+ sizeof(u64) * mr->max_pages,
+ DMA_TO_DEVICE);
+
+ return rc;
+}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4ad9be3ad61c..a2e4ca56da44 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -111,7 +111,7 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
[IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
[IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
- [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
+ [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
[IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
@@ -617,6 +617,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
return 0;
}
+static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_qp *qp)
+{
+ mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+ mlx4_counter_free(dev->dev, qp->counter_index->index);
+ list_del(&qp->counter_index->list);
+ mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+ kfree(qp->counter_index);
+ qp->counter_index = NULL;
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
@@ -746,9 +758,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
} else {
qp->sq_no_prefetch = 0;
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
- qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
-
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
@@ -822,6 +831,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_proxy;
}
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
if (err)
goto err_qpn;
@@ -1086,6 +1098,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
{
struct mlx4_ib_qp *qp = NULL;
int err;
+ int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
gfp_t gfp;
@@ -1109,8 +1122,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
}
if (init_attr->create_flags &&
- (udata ||
- ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
+ ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
+ ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
init_attr->qp_type != IB_QPT_UD) ||
((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type > IB_QPT_GSI)))
@@ -1189,6 +1204,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
}
+ if (mqp->counter_index)
+ mlx4_ib_free_qp_counter(dev, mqp);
+
pd = get_pd(mqp);
destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
@@ -1391,11 +1409,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
enum ib_qp_attr_mask qp_attr_mask,
struct mlx4_ib_qp *mqp,
- struct mlx4_qp_path *path, u8 port)
+ struct mlx4_qp_path *path, u8 port,
+ u16 vlan_id, u8 *smac)
{
return _mlx4_set_path(dev, &qp->ah_attr,
- mlx4_mac_to_u64((u8 *)qp->smac),
- (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+ mlx4_mac_to_u64(smac),
+ vlan_id,
path, &mqp->pri, port);
}
@@ -1406,9 +1425,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
struct mlx4_qp_path *path, u8 port)
{
return _mlx4_set_path(dev, &qp->alt_ah_attr,
- mlx4_mac_to_u64((u8 *)qp->alt_smac),
- (qp_attr_mask & IB_QP_ALT_VID) ?
- qp->alt_vlan_id : 0xffff,
+ 0,
+ 0xffff,
path, &mqp->alt, port);
}
@@ -1424,7 +1442,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
-static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_qp *qp,
struct mlx4_qp_context *context)
{
u64 u64_mac;
@@ -1447,6 +1466,40 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *
return 0;
}
+static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ struct counter_index *new_counter_index;
+ int err;
+ u32 tmp_idx;
+
+ if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) !=
+ IB_LINK_LAYER_ETHERNET ||
+ !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) ||
+ !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
+ return 0;
+
+ err = mlx4_counter_alloc(dev->dev, &tmp_idx);
+ if (err)
+ return err;
+
+ new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL);
+ if (!new_counter_index) {
+ mlx4_counter_free(dev->dev, tmp_idx);
+ return -ENOMEM;
+ }
+
+ new_counter_index->index = tmp_idx;
+ new_counter_index->allocated = 1;
+ qp->counter_index = new_counter_index;
+
+ mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+ list_add_tail(&new_counter_index->list,
+ &dev->counters_table[qp->port - 1].counters_list);
+ mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+ return 0;
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1460,6 +1513,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int sqd_event;
int steer_qp = 0;
int err = -EINVAL;
+ int counter_index;
/* APM is not supported under RoCE */
if (attr_mask & IB_QP_ALT_PATH &&
@@ -1519,6 +1573,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
context->sq_size_stride |= qp->sq.wqe_shift - 4;
+ if (new_state == IB_QPS_RESET && qp->counter_index)
+ mlx4_ib_free_qp_counter(dev, qp);
+
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
context->xrcd = cpu_to_be32((u32) qp->xrcdn);
@@ -1543,10 +1600,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
- if (dev->counters[qp->port - 1].index != -1) {
- context->pri_path.counter_index =
- dev->counters[qp->port - 1].index;
+ err = create_qp_lb_counter(dev, qp);
+ if (err)
+ goto out;
+
+ counter_index =
+ dev->counters_table[qp->port - 1].default_counter;
+ if (qp->counter_index)
+ counter_index = qp->counter_index->index;
+
+ if (counter_index != -1) {
+ context->pri_path.counter_index = counter_index;
optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ if (qp->counter_index) {
+ context->pri_path.fl |=
+ MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ context->pri_path.vlan_control |=
+ MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ }
} else
context->pri_path.counter_index =
MLX4_SINK_COUNTER_INDEX(dev->dev);
@@ -1565,9 +1636,33 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
+ u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
+ u16 vlan = 0xffff;
+ u8 smac[ETH_ALEN];
+ int status = 0;
+
+ if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ attr->ah_attr.ah_flags & IB_AH_GRH) {
+ int index = attr->ah_attr.grh.sgid_index;
+
+ status = ib_get_cached_gid(ibqp->device, port_num,
+ index, &gid, &gid_attr);
+ if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
+ status = -ENOENT;
+ if (!status && gid_attr.ndev) {
+ vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+ memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gid_attr.ndev);
+ }
+ }
+ if (status)
+ goto out;
+
if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
- attr_mask & IB_QP_PORT ?
- attr->port_num : qp->port))
+ port_num, vlan, smac))
goto out;
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1704,7 +1799,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
- err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
+ err = handle_eth_ud_smac_index(dev, qp, context);
if (err) {
err = -EINVAL;
goto out;
@@ -1848,6 +1943,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
out:
+ if (err && qp->counter_index)
+ mlx4_ib_free_qp_counter(dev, qp);
if (err && steer_qp)
mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
@@ -2036,14 +2133,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
}
static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
- struct ib_send_wr *wr,
+ struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
struct ib_device *ib_dev = &mdev->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
- struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ struct mlx4_ib_ah *ah = to_mah(wr->ah);
u16 pkey;
u32 qkey;
int send_size;
@@ -2051,13 +2148,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
int spc;
int i;
- if (wr->opcode != IB_WR_SEND)
+ if (wr->wr.opcode != IB_WR_SEND)
return -EINVAL;
send_size = 0;
- for (i = 0; i < wr->num_sge; ++i)
- send_size += wr->sg_list[i].length;
+ for (i = 0; i < wr->wr.num_sge; ++i)
+ send_size += wr->wr.sg_list[i].length;
/* for proxy-qp0 sends, need to add in size of tunnel header */
/* for tunnel-qp0 sends, tunnel header is already in s/g list */
@@ -2082,11 +2179,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
mlx->rlid = sqp->ud_header.lrh.destination_lid;
sqp->ud_header.lrh.virtual_lane = 0;
- sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
- sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
else
sqp->ud_header.bth.destination_qpn =
cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
@@ -2158,14 +2255,14 @@ static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
}
}
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
- struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ struct mlx4_ib_ah *ah = to_mah(wr->ah);
union ib_gid sgid;
u16 pkey;
int send_size;
@@ -2179,8 +2276,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
bool is_grh;
send_size = 0;
- for (i = 0; i < wr->num_sge; ++i)
- send_size += wr->sg_list[i].length;
+ for (i = 0; i < wr->wr.num_sge; ++i)
+ send_size += wr->wr.sg_list[i].length;
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
@@ -2197,7 +2294,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
} else {
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid);
+ ah->av.ib.gid_index, &sgid,
+ NULL);
+ if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
+ err = -ENOENT;
if (err)
return err;
}
@@ -2239,7 +2339,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index,
- &sqp->ud_header.grh.source_gid);
+ &sqp->ud_header.grh.source_gid, NULL);
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
@@ -2257,7 +2357,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
mlx->rlid = sqp->ud_header.lrh.destination_lid;
}
- switch (wr->opcode) {
+ switch (wr->wr.opcode) {
case IB_WR_SEND:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
@@ -2265,7 +2365,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->ex.imm_data;
+ sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
break;
default:
return -EINVAL;
@@ -2308,16 +2408,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
}
- sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
else
- ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
- sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
- sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->remote_qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -2405,43 +2505,39 @@ static __be32 convert_access(int acc)
cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
}
-static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
+ struct ib_reg_wr *wr)
{
- struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
- int i;
-
- for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
- mfrpl->mapped_page_list[i] =
- cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
- MLX4_MTT_FLAG_PRESENT);
+ struct mlx4_ib_mr *mr = to_mmr(wr->mr);
- fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
- fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
- fseg->buf_list = cpu_to_be64(mfrpl->map);
- fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
- fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
+ fseg->flags = convert_access(wr->access);
+ fseg->mem_key = cpu_to_be32(wr->key);
+ fseg->buf_list = cpu_to_be64(mr->page_map);
+ fseg->start_addr = cpu_to_be64(mr->ibmr.iova);
+ fseg->reg_len = cpu_to_be64(mr->ibmr.length);
fseg->offset = 0; /* XXX -- is this just for ZBVA? */
- fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
+ fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size));
fseg->reserved[0] = 0;
fseg->reserved[1] = 0;
}
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
+static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
+ struct ib_bind_mw_wr *wr)
{
bseg->flags1 =
- convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
+ convert_access(wr->bind_info.mw_access_flags) &
cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
bseg->flags2 = 0;
- if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
+ if (wr->mw->type == IB_MW_TYPE_2)
bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
- if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
+ if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
- bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
- bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
- bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
- bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
+ bseg->new_rkey = cpu_to_be32(wr->rkey);
+ bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
+ bseg->addr = cpu_to_be64(wr->bind_info.addr);
+ bseg->length = cpu_to_be64(wr->bind_info.length);
}
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
@@ -2458,46 +2554,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
-static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
+ struct ib_atomic_wr *wr)
{
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+ if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->swap);
+ aseg->compare = cpu_to_be64(wr->compare_add);
+ } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+ aseg->swap_add = cpu_to_be64(wr->compare_add);
+ aseg->compare = cpu_to_be64(wr->compare_add_mask);
} else {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->swap_add = cpu_to_be64(wr->compare_add);
aseg->compare = 0;
}
}
static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
- struct ib_send_wr *wr)
+ struct ib_atomic_wr *wr)
{
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+ aseg->swap_add = cpu_to_be64(wr->swap);
+ aseg->swap_add_mask = cpu_to_be64(wr->swap_mask);
+ aseg->compare = cpu_to_be64(wr->compare_add);
+ aseg->compare_mask = cpu_to_be64(wr->compare_add_mask);
}
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr)
+ struct ib_ud_wr *wr)
{
- memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
- dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
- dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
- memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
+ memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
+ dseg->dqpn = cpu_to_be32(wr->remote_qpn);
+ dseg->qkey = cpu_to_be32(wr->remote_qkey);
+ dseg->vlan = to_mah(wr->ah)->av.eth.vlan;
+ memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6);
}
static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr,
+ struct ib_ud_wr *wr,
enum mlx4_ib_qp_type qpt)
{
- union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
+ union mlx4_ext_av *av = &to_mah(wr->ah)->av;
struct mlx4_av sqp_av = {0};
int port = *((u8 *) &av->ib.port_pd) & 0x3;
@@ -2516,18 +2613,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
}
-static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
+static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
{
struct mlx4_wqe_inline_seg *inl = wqe;
struct mlx4_ib_tunnel_header hdr;
- struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ struct mlx4_ib_ah *ah = to_mah(wr->ah);
int spc;
int i;
memcpy(&hdr.av, &ah->av, sizeof hdr.av);
- hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
- hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ hdr.remote_qpn = cpu_to_be32(wr->remote_qpn);
+ hdr.pkey_index = cpu_to_be16(wr->pkey_index);
+ hdr.qkey = cpu_to_be32(wr->remote_qkey);
memcpy(hdr.mac, ah->av.eth.mac, 6);
hdr.vlan = ah->av.eth.vlan;
@@ -2599,22 +2696,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
__be32 *lso_hdr_sz, __be32 *blh)
{
- unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
+ unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
*blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
- wr->num_sge > qp->sq.max_gs - (halign >> 4)))
+ wr->wr.num_sge > qp->sq.max_gs - (halign >> 4)))
return -EINVAL;
- memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
+ memcpy(wqe->header, wr->header, wr->hlen);
- *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
+ *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen);
*lso_seg_len = halign;
return 0;
}
@@ -2713,11 +2810,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
- set_atomic_seg(wqe, wr);
+ set_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mlx4_wqe_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2726,11 +2823,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
- set_masked_atomic_seg(wqe, wr);
+ set_masked_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2741,8 +2838,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
break;
@@ -2755,18 +2852,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
ctrl->srcrb_flags |=
cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_fmr_seg(wqe, wr);
- wqe += sizeof (struct mlx4_wqe_fmr_seg);
- size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+ set_reg_seg(wqe, reg_wr(wr));
+ wqe += sizeof(struct mlx4_wqe_fmr_seg);
+ size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
break;
case IB_WR_BIND_MW:
ctrl->srcrb_flags |=
cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_bind_seg(wqe, wr);
+ set_bind_seg(wqe, bind_mw_wr(wr));
wqe += sizeof(struct mlx4_wqe_bind_seg);
size += sizeof(struct mlx4_wqe_bind_seg) / 16;
break;
@@ -2777,7 +2874,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_TUN_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+ ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2788,19 +2886,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case MLX4_IB_QPT_TUN_SMI:
case MLX4_IB_QPT_TUN_GSI:
/* this is a UD qp used in MAD responses to slaves. */
- set_datagram_seg(wqe, wr);
+ set_datagram_seg(wqe, ud_wr(wr));
/* set the forced-loopback bit in the data seg av */
*(__be32 *) wqe |= cpu_to_be32(0x80000000);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
break;
case MLX4_IB_QPT_UD:
- set_datagram_seg(wqe, wr);
+ set_datagram_seg(wqe, ud_wr(wr));
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
- err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
+ err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
+ &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2812,7 +2911,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+ ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2823,7 +2923,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
add_zero_len_inline(wqe);
wqe += 16;
size++;
- build_tunnel_header(wr, wqe, &seglen);
+ build_tunnel_header(ud_wr(wr), wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
@@ -2833,18 +2933,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* In this case we first add a UD segment targeting
* the tunnel qp, and then add a header with address
* information */
- set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
+ ud_wr(wr),
qp->mlx4_ib_qp_type);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
- build_tunnel_header(wr, wqe, &seglen);
+ build_tunnel_header(ud_wr(wr), wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+ err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2d0dbbf38ceb..3dfd287256d6 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -109,8 +109,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
case IB_WR_LOCAL_INV:
return IB_WC_LOCAL_INV;
- case IB_WR_FAST_REG_MR:
- return IB_WC_FAST_REG_MR;
+ case IB_WR_REG_MR:
+ return IB_WC_REG_MR;
default:
pr_warn("unknown completion status\n");
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 68508d528ba0..7e97cb55a6bf 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1425,8 +1425,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
- dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
- dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
+ dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 22123b79d550..633347260b79 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -245,6 +245,7 @@ enum mlx5_ib_qp_flags {
};
struct mlx5_umr_wr {
+ struct ib_send_wr wr;
union {
u64 virt_addr;
u64 offset;
@@ -257,6 +258,11 @@ struct mlx5_umr_wr {
u32 mkey;
};
+static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct mlx5_umr_wr, wr);
+}
+
struct mlx5_shared_mr_info {
int mr_id;
struct ib_umem *umem;
@@ -313,6 +319,11 @@ enum mlx5_ib_mtt_access_flags {
struct mlx5_ib_mr {
struct ib_mr ibmr;
+ void *descs;
+ dma_addr_t desc_map;
+ int ndescs;
+ int max_descs;
+ int desc_size;
struct mlx5_core_mr mmr;
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
@@ -324,12 +335,7 @@ struct mlx5_ib_mr {
struct mlx5_create_mkey_mbox_out out;
struct mlx5_core_sig_ctx *sig;
int live;
-};
-
-struct mlx5_ib_fast_reg_page_list {
- struct ib_fast_reg_page_list ibfrpl;
- __be64 *mapped_page_list;
- dma_addr_t map;
+ void *descs_alloc;
};
struct mlx5_ib_umr_context {
@@ -358,20 +364,6 @@ enum {
MLX5_FMR_BUSY,
};
-struct mlx5_ib_fmr {
- struct ib_fmr ibfmr;
- struct mlx5_core_mr mr;
- int access_flags;
- int state;
- /* protect fmr state
- */
- spinlock_t lock;
- u64 wrid;
- struct ib_send_wr wr[2];
- u8 page_shift;
- struct ib_fast_reg_page_list page_list;
-};
-
struct mlx5_cache_ent {
struct list_head head;
/* sync access to the cahce entry
@@ -456,11 +448,6 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
}
-static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
-}
-
static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mlx5_ib_cq, ibcq);
@@ -501,11 +488,6 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx5_ib_mr, ibmr);
}
-static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
-{
- return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
-}
-
struct mlx5_ib_ah {
struct ib_ah ibah;
struct mlx5_av av;
@@ -573,15 +555,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len);
-void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
- struct ib_fmr_attr *fmr_attr);
-int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int npages, u64 iova);
-int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
-int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 54a15b5d336d..ec8993a7b3be 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -687,7 +687,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
sg->addr = dma;
sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -715,7 +715,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
struct ib_send_wr *wr, u32 key)
{
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
wr->opcode = MLX5_IB_WR_UMR;
@@ -752,7 +752,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
struct device *ddev = dev->ib_dev.dma_device;
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr wr, *bad;
+ struct mlx5_umr_wr umrwr;
+ struct ib_send_wr *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
int size;
@@ -798,14 +799,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
goto free_pas;
}
- memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)&umr_context;
- prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
- virt_addr, len, access_flags);
+ memset(&umrwr, 0, sizeof(umrwr));
+ umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
+ prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
+ page_shift, virt_addr, len, access_flags);
mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr, &bad);
+ err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
if (err) {
mlx5_ib_warn(dev, "post send failed, err %d\n", err);
goto unmap_dma;
@@ -851,8 +852,8 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
int size;
__be64 *pas;
dma_addr_t dma;
- struct ib_send_wr wr, *bad;
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+ struct ib_send_wr *bad;
+ struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
@@ -917,26 +918,26 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)&umr_context;
+ wr.wr.wr_id = (u64)(unsigned long)&umr_context;
sg.addr = dma;
sg.length = ALIGN(npages * sizeof(u64),
MLX5_UMR_MTT_ALIGNMENT);
sg.lkey = dev->umrc.pd->local_dma_lkey;
- wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+ wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
MLX5_IB_SEND_UMR_UPDATE_MTT;
- wr.sg_list = &sg;
- wr.num_sge = 1;
- wr.opcode = MLX5_IB_WR_UMR;
- umrwr->npages = sg.length / sizeof(u64);
- umrwr->page_shift = PAGE_SHIFT;
- umrwr->mkey = mr->mmr.key;
- umrwr->target.offset = start_page_index;
+ wr.wr.sg_list = &sg;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = MLX5_IB_WR_UMR;
+ wr.npages = sg.length / sizeof(u64);
+ wr.page_shift = PAGE_SHIFT;
+ wr.mkey = mr->mmr.key;
+ wr.target.offset = start_page_index;
mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr, &bad);
+ err = ib_post_send(umrc->qp, &wr.wr, &bad);
if (err) {
mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
} else {
@@ -1122,16 +1123,17 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr wr, *bad;
+ struct mlx5_umr_wr umrwr;
+ struct ib_send_wr *bad;
int err;
- memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)&umr_context;
- prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+ memset(&umrwr.wr, 0, sizeof(umrwr));
+ umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
+ prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr, &bad);
+ err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
if (err) {
up(&umrc->sem);
mlx5_ib_dbg(dev, "err %d\n", err);
@@ -1151,6 +1153,52 @@ error:
return err;
}
+static int
+mlx5_alloc_priv_descs(struct ib_device *device,
+ struct mlx5_ib_mr *mr,
+ int ndescs,
+ int desc_size)
+{
+ int size = ndescs * desc_size;
+ int add_size;
+ int ret;
+
+ add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+
+ mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
+ if (!mr->descs_alloc)
+ return -ENOMEM;
+
+ mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
+
+ mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+ size, DMA_TO_DEVICE);
+ if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+err:
+ kfree(mr->descs_alloc);
+
+ return ret;
+}
+
+static void
+mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
+{
+ if (mr->descs) {
+ struct ib_device *device = mr->ibmr.device;
+ int size = mr->max_descs * mr->desc_size;
+
+ dma_unmap_single(device->dma_device, mr->desc_map,
+ size, DMA_TO_DEVICE);
+ kfree(mr->descs_alloc);
+ mr->descs = NULL;
+ }
+}
+
static int clean_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
@@ -1170,6 +1218,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
mr->sig = NULL;
}
+ mlx5_free_priv_descs(mr);
+
if (!umred) {
err = destroy_mkey(dev, mr);
if (err) {
@@ -1259,6 +1309,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (mr_type == IB_MR_TYPE_MEM_REG) {
access_mode = MLX5_ACCESS_MODE_MTT;
in->seg.log2_page_size = PAGE_SHIFT;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr,
+ ndescs, sizeof(u64));
+ if (err)
+ goto err_free_in;
+
+ mr->desc_size = sizeof(u64);
+ mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
@@ -1315,6 +1373,7 @@ err_destroy_psv:
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
}
+ mlx5_free_priv_descs(mr);
err_free_sig:
kfree(mr->sig);
err_free_in:
@@ -1324,48 +1383,6 @@ err_free:
return ERR_PTR(err);
}
-struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len)
-{
- struct mlx5_ib_fast_reg_page_list *mfrpl;
- int size = page_list_len * sizeof(u64);
-
- mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
- if (!mfrpl)
- return ERR_PTR(-ENOMEM);
-
- mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
- if (!mfrpl->ibfrpl.page_list)
- goto err_free;
-
- mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
- size, &mfrpl->map,
- GFP_KERNEL);
- if (!mfrpl->mapped_page_list)
- goto err_free;
-
- WARN_ON(mfrpl->map & 0x3f);
-
- return &mfrpl->ibfrpl;
-
-err_free:
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
- return ERR_PTR(-ENOMEM);
-}
-
-void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
- struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
- struct mlx5_ib_dev *dev = to_mdev(page_list->device);
- int size = page_list->max_page_list_len * sizeof(u64);
-
- dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
- mfrpl->map);
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
-}
-
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status)
{
@@ -1406,3 +1423,39 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
done:
return ret;
}
+
+static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ __be64 *descs;
+
+ if (unlikely(mr->ndescs == mr->max_descs))
+ return -ENOMEM;
+
+ descs = mr->descs;
+ descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ int n;
+
+ mr->ndescs = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
+ mr->desc_size * mr->max_descs,
+ DMA_TO_DEVICE);
+
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+
+ ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
+ mr->desc_size * mr->max_descs,
+ DMA_TO_DEVICE);
+
+ return n;
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6f521a3418e8..307bdbca8938 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -64,7 +64,7 @@ static const u32 mlx5_ib_opcode[] = {
[IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
[IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
[IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
- [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
+ [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
[MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
@@ -1838,9 +1838,9 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
{
- memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
- dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
- dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
}
static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
@@ -1896,22 +1896,24 @@ static __be64 sig_mkey_mask(void)
return cpu_to_be64(result);
}
-static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr, int li)
+static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct mlx5_ib_mr *mr)
{
- memset(umr, 0, sizeof(*umr));
-
- if (li) {
- umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
- umr->flags = 1 << 7;
- return;
- }
+ int ndescs = mr->ndescs;
- umr->flags = (1 << 5); /* fail if not free */
- umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
+ memset(umr, 0, sizeof(*umr));
+ umr->flags = MLX5_UMR_CHECK_NOT_FREE;
+ umr->klm_octowords = get_klm_octo(ndescs);
umr->mkey_mask = frwr_mkey_mask();
}
+static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
+{
+ memset(umr, 0, sizeof(*umr));
+ umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+ umr->flags = 1 << 7;
+}
+
static __be64 get_umr_reg_mr_mask(void)
{
u64 result;
@@ -1952,7 +1954,7 @@ static __be64 get_umr_update_mtt_mask(void)
static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr)
{
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(umr, 0, sizeof(*umr));
@@ -1987,29 +1989,31 @@ static u8 get_umr_flags(int acc)
MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
}
-static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
- int li, int *writ)
+static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
+ struct mlx5_ib_mr *mr,
+ u32 key, int access)
{
- memset(seg, 0, sizeof(*seg));
- if (li) {
- seg->status = MLX5_MKEY_STATUS_FREE;
- return;
- }
+ int ndescs = ALIGN(mr->ndescs, 8) >> 1;
- seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
- MLX5_ACCESS_MODE_MTT;
- *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
- seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
+ memset(seg, 0, sizeof(*seg));
+ seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT;
+ seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
- seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
- seg->len = cpu_to_be64(wr->wr.fast_reg.length);
- seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
- seg->log2_page_size = wr->wr.fast_reg.page_shift;
+ seg->start_addr = cpu_to_be64(mr->ibmr.iova);
+ seg->len = cpu_to_be64(mr->ibmr.length);
+ seg->xlt_oct_size = cpu_to_be32(ndescs);
+ seg->log2_page_size = ilog2(mr->ibmr.page_size);
+}
+
+static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
+{
+ memset(seg, 0, sizeof(*seg));
+ seg->status = MLX5_MKEY_STATUS_FREE;
}
static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
{
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
@@ -2028,21 +2032,14 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
mlx5_mkey_variant(umrwr->mkey));
}
-static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
- struct ib_send_wr *wr,
- struct mlx5_core_dev *mdev,
- struct mlx5_ib_pd *pd,
- int writ)
+static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
+ struct mlx5_ib_mr *mr,
+ struct mlx5_ib_pd *pd)
{
- struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
- u64 *page_list = wr->wr.fast_reg.page_list->page_list;
- u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
- int i;
+ int bcount = mr->desc_size * mr->ndescs;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
- mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
- dseg->addr = cpu_to_be64(mfrpl->map);
- dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
+ dseg->addr = cpu_to_be64(mr->desc_map);
+ dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
@@ -2224,22 +2221,22 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
return 0;
}
-static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
- void **seg, int *size)
+static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
+ struct mlx5_ib_qp *qp, void **seg, int *size)
{
- struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
- struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
+ struct ib_mr *sig_mr = wr->sig_mr;
struct mlx5_bsf *bsf;
- u32 data_len = wr->sg_list->length;
- u32 data_key = wr->sg_list->lkey;
- u64 data_va = wr->sg_list->addr;
+ u32 data_len = wr->wr.sg_list->length;
+ u32 data_key = wr->wr.sg_list->lkey;
+ u64 data_va = wr->wr.sg_list->addr;
int ret;
int wqe_size;
- if (!wr->wr.sig_handover.prot ||
- (data_key == wr->wr.sig_handover.prot->lkey &&
- data_va == wr->wr.sig_handover.prot->addr &&
- data_len == wr->wr.sig_handover.prot->length)) {
+ if (!wr->prot ||
+ (data_key == wr->prot->lkey &&
+ data_va == wr->prot->addr &&
+ data_len == wr->prot->length)) {
/**
* Source domain doesn't contain signature information
* or data and protection are interleaved in memory.
@@ -2273,8 +2270,8 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
struct mlx5_stride_block_entry *data_sentry;
struct mlx5_stride_block_entry *prot_sentry;
- u32 prot_key = wr->wr.sig_handover.prot->lkey;
- u64 prot_va = wr->wr.sig_handover.prot->addr;
+ u32 prot_key = wr->prot->lkey;
+ u64 prot_va = wr->prot->addr;
u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
int prot_size;
@@ -2326,16 +2323,16 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- struct ib_send_wr *wr, u32 nelements,
+ struct ib_sig_handover_wr *wr, u32 nelements,
u32 length, u32 pdn)
{
- struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ struct ib_mr *sig_mr = wr->sig_mr;
u32 sig_key = sig_mr->rkey;
u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
memset(seg, 0, sizeof(*seg));
- seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
+ seg->flags = get_umr_flags(wr->access_flags) |
MLX5_ACCESS_MODE_KLM;
seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
@@ -2346,7 +2343,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
}
static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr, u32 nelements)
+ u32 nelements)
{
memset(umr, 0, sizeof(*umr));
@@ -2357,37 +2354,37 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
}
-static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
void **seg, int *size)
{
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
+ struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
u32 pdn = get_pd(qp)->pdn;
u32 klm_oct_size;
int region_len, ret;
- if (unlikely(wr->num_sge != 1) ||
- unlikely(wr->wr.sig_handover.access_flags &
- IB_ACCESS_REMOTE_ATOMIC) ||
+ if (unlikely(wr->wr.num_sge != 1) ||
+ unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
unlikely(!sig_mr->sig->sig_status_checked))
return -EINVAL;
/* length of the protected region, data + protection */
- region_len = wr->sg_list->length;
- if (wr->wr.sig_handover.prot &&
- (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey ||
- wr->wr.sig_handover.prot->addr != wr->sg_list->addr ||
- wr->wr.sig_handover.prot->length != wr->sg_list->length))
- region_len += wr->wr.sig_handover.prot->length;
+ region_len = wr->wr.sg_list->length;
+ if (wr->prot &&
+ (wr->prot->lkey != wr->wr.sg_list->lkey ||
+ wr->prot->addr != wr->wr.sg_list->addr ||
+ wr->prot->length != wr->wr.sg_list->length))
+ region_len += wr->prot->length;
/**
* KLM octoword size - if protection was provided
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
- klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
+ klm_oct_size = wr->prot ? 3 : 1;
- set_sig_umr_segment(*seg, wr, klm_oct_size);
+ set_sig_umr_segment(*seg, klm_oct_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
@@ -2433,38 +2430,52 @@ static int set_psv_wr(struct ib_sig_domain *domain,
return 0;
}
-static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
- struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
+static int set_reg_wr(struct mlx5_ib_qp *qp,
+ struct ib_reg_wr *wr,
+ void **seg, int *size)
{
- int writ = 0;
- int li;
+ struct mlx5_ib_mr *mr = to_mmr(wr->mr);
+ struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
- if (unlikely(wr->send_flags & IB_SEND_INLINE))
+ if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
+ mlx5_ib_warn(to_mdev(qp->ibqp.device),
+ "Invalid IB_SEND_INLINE send flag\n");
return -EINVAL;
+ }
- set_frwr_umr_segment(*seg, wr, li);
+ set_reg_umr_seg(*seg, mr);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
- set_mkey_segment(*seg, wr, li, &writ);
+
+ set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
- if (!li) {
- if (unlikely(wr->wr.fast_reg.page_list_len >
- wr->wr.fast_reg.page_list->max_page_list_len))
- return -ENOMEM;
- set_frwr_pages(*seg, wr, mdev, pd, writ);
- *seg += sizeof(struct mlx5_wqe_data_seg);
- *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
- }
+ set_reg_data_seg(*seg, mr, pd);
+ *seg += sizeof(struct mlx5_wqe_data_seg);
+ *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+
return 0;
}
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+{
+ set_linv_umr_seg(*seg);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+ set_linv_mkey_seg(*seg);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+}
+
static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
{
__be32 *p = NULL;
@@ -2578,7 +2589,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
@@ -2627,7 +2637,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
- xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
seg += sizeof(*xrc);
size += sizeof(*xrc) / 16;
/* fall through */
@@ -2636,8 +2645,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
seg += sizeof(struct mlx5_wqe_raddr_seg);
size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
break;
@@ -2654,22 +2663,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
+ set_linv_wr(qp, &seg, &size);
num_sge = 0;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
- ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
- err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+ qp->sq.wr_data[idx] = IB_WR_REG_MR;
+ ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
+ err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
if (err) {
- mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
@@ -2678,7 +2681,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_REG_SIG_MR:
qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
- mr = to_mmr(wr->wr.sig_handover.sig_mr);
+ mr = to_mmr(sig_handover_wr(wr)->sig_mr);
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
err = set_sig_umr_wr(wr, qp, &seg, &size);
@@ -2706,7 +2709,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
+ err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
mr->sig->psv_memory.psv_idx, &seg,
&size);
if (err) {
@@ -2728,7 +2731,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
+ err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
mr->sig->psv_wire.psv_idx, &seg,
&size);
if (err) {
@@ -2752,8 +2755,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
seg += sizeof(struct mlx5_wqe_raddr_seg);
size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
break;
@@ -2780,7 +2783,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
- ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+ ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
set_reg_umr_segment(seg, wr);
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 32f6c6315454..bcac294042f5 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -281,7 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
ib_get_cached_gid(&dev->ib_dev,
be32_to_cpu(ah->av->port_pd) >> 24,
ah->av->gid_index % dev->limits.gid_table_len,
- &header->grh.source_gid);
+ &header->grh.source_gid, NULL);
memcpy(header->grh.destination_gid.raw,
ah->av->dgid, 16);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index e354b2f04ad9..35fe506e2cfa 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1476,7 +1476,7 @@ void mthca_free_qp(struct mthca_dev *dev,
/* Create UD header for an MLX send and build a data segment for it */
static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
- int ind, struct ib_send_wr *wr,
+ int ind, struct ib_ud_wr *wr,
struct mthca_mlx_seg *mlx,
struct mthca_data_seg *data)
{
@@ -1485,10 +1485,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
u16 pkey;
ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
- mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
+ mthca_ah_grh_present(to_mah(wr->ah)), 0,
&sqp->ud_header);
- err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
+ err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
if (err)
return err;
mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
@@ -1499,7 +1499,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
mlx->rlid = sqp->ud_header.lrh.destination_lid;
mlx->vcrc = 0;
- switch (wr->opcode) {
+ switch (wr->wr.opcode) {
case IB_WR_SEND:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
@@ -1507,7 +1507,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->ex.imm_data;
+ sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
break;
default:
return -EINVAL;
@@ -1516,18 +1516,18 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
- sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
sqp->pkey_index, &pkey);
else
ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- wr->wr.ud.pkey_index, &pkey);
+ wr->pkey_index, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
- sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
- sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->remote_qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header,
@@ -1569,34 +1569,34 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
}
static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
- struct ib_send_wr *wr)
+ struct ib_atomic_wr *wr)
{
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->swap);
+ aseg->compare = cpu_to_be64(wr->compare_add);
} else {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->swap_add = cpu_to_be64(wr->compare_add);
aseg->compare = 0;
}
}
static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
- struct ib_send_wr *wr)
+ struct ib_ud_wr *wr)
{
- useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
- useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
- useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ useg->lkey = cpu_to_be32(to_mah(wr->ah)->key);
+ useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma);
+ useg->dqpn = cpu_to_be32(wr->remote_qpn);
+ useg->qkey = cpu_to_be32(wr->remote_qkey);
}
static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
- struct ib_send_wr *wr)
+ struct ib_ud_wr *wr)
{
- memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
- useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
+ useg->dqpn = cpu_to_be32(wr->remote_qpn);
+ useg->qkey = cpu_to_be32(wr->remote_qkey);
}
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1664,11 +1664,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
- set_atomic_seg(wqe, wr);
+ set_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mthca_atomic_seg);
size += (sizeof (struct mthca_raddr_seg) +
sizeof (struct mthca_atomic_seg)) / 16;
@@ -1677,8 +1677,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
case IB_WR_RDMA_READ:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1694,8 +1694,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1708,13 +1708,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- set_tavor_ud_seg(wqe, wr);
+ set_tavor_ud_seg(wqe, ud_wr(wr));
wqe += sizeof (struct mthca_tavor_ud_seg);
size += sizeof (struct mthca_tavor_ud_seg) / 16;
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
wqe - sizeof (struct mthca_next_seg),
wqe);
if (err) {
@@ -2005,11 +2005,11 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
- set_atomic_seg(wqe, wr);
+ set_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mthca_atomic_seg);
size += (sizeof (struct mthca_raddr_seg) +
sizeof (struct mthca_atomic_seg)) / 16;
@@ -2018,8 +2018,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -2035,8 +2035,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -2049,13 +2049,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- set_arbel_ud_seg(wqe, wr);
+ set_arbel_ud_seg(wqe, ud_wr(wr));
wqe += sizeof (struct mthca_arbel_ud_seg);
size += sizeof (struct mthca_arbel_ud_seg) / 16;
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
wqe - sizeof (struct mthca_next_seg),
wqe);
if (err) {
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index d748e4b31b8d..c9080208aad2 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1200,12 +1200,6 @@ struct nes_fast_mr_wqe_pbl {
dma_addr_t paddr;
};
-struct nes_ib_fast_reg_page_list {
- struct ib_fast_reg_page_list ibfrpl;
- struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
- u64 pbl;
-};
-
struct nes_listener {
struct work_struct work;
struct workqueue_struct *wq;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 44cb513f9a87..137880a19ebe 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -51,6 +51,7 @@ atomic_t qps_created;
atomic_t sw_qps_destroyed;
static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
+static int nes_dereg_mr(struct ib_mr *ib_mr);
/**
* nes_alloc_mw
@@ -443,79 +444,46 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
} else {
kfree(nesmr);
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
+ return ERR_PTR(-ENOMEM);
}
+
+ nesmr->pages = pci_alloc_consistent(nesdev->pcidev,
+ max_num_sg * sizeof(u64),
+ &nesmr->paddr);
+ if (!nesmr->paddr)
+ goto err;
+
+ nesmr->max_pages = max_num_sg;
+
return ibmr;
+
+err:
+ nes_dereg_mr(ibmr);
+
+ return ERR_PTR(-ENOMEM);
}
-/*
- * nes_alloc_fast_reg_page_list
- */
-static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
- struct ib_device *ibdev,
- int page_list_len)
+static int nes_set_page(struct ib_mr *ibmr, u64 addr)
{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct ib_fast_reg_page_list *pifrpl;
- struct nes_ib_fast_reg_page_list *pnesfrpl;
+ struct nes_mr *nesmr = to_nesmr(ibmr);
- if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
- return ERR_PTR(-E2BIG);
- /*
- * Allocate the ib_fast_reg_page_list structure, the
- * nes_fast_bpl structure, and the PLB table.
- */
- pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
- page_list_len * sizeof(u64), GFP_KERNEL);
-
- if (!pnesfrpl)
- return ERR_PTR(-ENOMEM);
+ if (unlikely(nesmr->npages == nesmr->max_pages))
+ return -ENOMEM;
- pifrpl = &pnesfrpl->ibfrpl;
- pifrpl->page_list = &pnesfrpl->pbl;
- pifrpl->max_page_list_len = page_list_len;
- /*
- * Allocate the WQE PBL
- */
- pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
- page_list_len * sizeof(u64),
- &pnesfrpl->nes_wqe_pbl.paddr);
+ nesmr->pages[nesmr->npages++] = cpu_to_le64(addr);
- if (!pnesfrpl->nes_wqe_pbl.kva) {
- kfree(pnesfrpl);
- return ERR_PTR(-ENOMEM);
- }
- nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
- "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
- "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl,
- pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
- (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr);
-
- return pifrpl;
+ return 0;
}
-/*
- * nes_free_fast_reg_page_list
- */
-static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
+static int nes_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_ib_fast_reg_page_list *pnesfrpl;
+ struct nes_mr *nesmr = to_nesmr(ibmr);
- pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
- /*
- * Free the WQE PBL.
- */
- pci_free_consistent(nesdev->pcidev,
- pifrpl->max_page_list_len * sizeof(u64),
- pnesfrpl->nes_wqe_pbl.kva,
- pnesfrpl->nes_wqe_pbl.paddr);
- /*
- * Free the PBL structure
- */
- kfree(pnesfrpl);
+ nesmr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page);
}
/**
@@ -2683,6 +2651,13 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
u16 major_code;
u16 minor_code;
+
+ if (nesmr->pages)
+ pci_free_consistent(nesdev->pcidev,
+ nesmr->max_pages * sizeof(u64),
+ nesmr->pages,
+ nesmr->paddr);
+
if (nesmr->region) {
ib_umem_release(nesmr->region);
}
@@ -3372,9 +3347,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
+ rdma_wr(ib_wr)->rkey);
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
+ rdma_wr(ib_wr)->remote_addr);
if ((ib_wr->send_flags & IB_SEND_INLINE) &&
((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
@@ -3409,9 +3384,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
}
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
+ rdma_wr(ib_wr)->remote_addr);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
+ rdma_wr(ib_wr)->rkey);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
ib_wr->sg_list->length);
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
@@ -3425,19 +3400,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
ib_wr->ex.invalidate_rkey);
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
{
- int i;
- int flags = ib_wr->wr.fast_reg.access_flags;
- struct nes_ib_fast_reg_page_list *pnesfrpl =
- container_of(ib_wr->wr.fast_reg.page_list,
- struct nes_ib_fast_reg_page_list,
- ibfrpl);
- u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
- u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
-
- if (ib_wr->wr.fast_reg.page_list_len >
- (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
+ struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr);
+ int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
+ int flags = reg_wr(ib_wr)->access;
+
+ if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
err = -EINVAL;
break;
@@ -3445,19 +3414,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
set_wqe_64bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
- ib_wr->wr.fast_reg.iova_start);
+ mr->ibmr.iova);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
- ib_wr->wr.fast_reg.length);
+ mr->ibmr.length);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
- ib_wr->wr.fast_reg.rkey);
- /* Set page size: */
- if (ib_wr->wr.fast_reg.page_shift == 12) {
+ reg_wr(ib_wr)->key);
+
+ if (page_shift == 12) {
wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
- } else if (ib_wr->wr.fast_reg.page_shift == 21) {
+ } else if (page_shift == 21) {
wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
} else {
nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
@@ -3465,6 +3434,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
err = -EINVAL;
break;
}
+
/* Set access_flags */
wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
if (flags & IB_ACCESS_LOCAL_WRITE)
@@ -3480,35 +3450,22 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
/* Fill in PBL info: */
- if (ib_wr->wr.fast_reg.page_list_len >
- pnesfrpl->ibfrpl.max_page_list_len) {
- nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
- " ib_wr=%p, value=%u, max=%u\n",
- ib_wr, ib_wr->wr.fast_reg.page_list_len,
- pnesfrpl->ibfrpl.max_page_list_len);
- err = -EINVAL;
- break;
- }
-
set_wqe_64bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
- pnesfrpl->nes_wqe_pbl.paddr);
+ mr->paddr);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
- ib_wr->wr.fast_reg.page_list_len * 8);
-
- for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
- dst_page_list[i] = cpu_to_le64(src_page_list[i]);
+ mr->npages * 8);
- nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, "
+ nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
"length: %d, rkey: %0x, pgl_paddr: %llx, "
"page_list_len: %u, wqe_misc: %x\n",
- (unsigned long long) ib_wr->wr.fast_reg.iova_start,
- ib_wr->wr.fast_reg.length,
- ib_wr->wr.fast_reg.rkey,
- (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr,
- ib_wr->wr.fast_reg.page_list_len,
+ (unsigned long long) mr->ibmr.iova,
+ mr->ibmr.length,
+ reg_wr(ib_wr)->key,
+ (unsigned long long) mr->paddr,
+ mr->npages,
wqe_misc);
break;
}
@@ -3751,7 +3708,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
entry->opcode = IB_WC_LOCAL_INV;
break;
case NES_IWARP_SQ_OP_FAST_REG:
- entry->opcode = IB_WC_FAST_REG_MR;
+ entry->opcode = IB_WC_REG_MR;
break;
}
@@ -3939,8 +3896,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.bind_mw = nes_bind_mw;
nesibdev->ibdev.alloc_mr = nes_alloc_mr;
- nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
- nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
+ nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
nesibdev->ibdev.attach_mcast = nes_multicast_attach;
nesibdev->ibdev.detach_mcast = nes_multicast_detach;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 309b31c31ae1..a204b677af22 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -79,6 +79,10 @@ struct nes_mr {
u16 pbls_used;
u8 mode;
u8 pbl_4k;
+ __le64 *pages;
+ dma_addr_t paddr;
+ u32 max_pages;
+ u32 npages;
};
struct nes_hw_pb {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index b4091ab48db0..ae80590aabdf 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -55,7 +55,7 @@
#include <be_roce.h>
#include "ocrdma_sli.h"
-#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0"
+#define OCRDMA_ROCE_DRV_VERSION "11.0.0.0"
#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -193,6 +193,8 @@ struct ocrdma_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct ocrdma_hw_mr hwmr;
+ u64 *pages;
+ u32 npages;
};
struct ocrdma_stats {
@@ -278,7 +280,6 @@ struct ocrdma_dev {
u32 hba_port_num;
struct list_head entry;
- struct rcu_head rcu;
int id;
u64 *stag_arr;
u8 sl; /* service level */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 44766fee1f4e..9820074be59d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -45,6 +45,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_cache.h>
#include "ocrdma.h"
#include "ocrdma_verbs.h"
@@ -56,10 +57,9 @@
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah_attr *attr, union ib_gid *sgid,
- int pdid, bool *isvlan)
+ int pdid, bool *isvlan, u16 vlan_tag)
{
int status = 0;
- u16 vlan_tag;
struct ocrdma_eth_vlan eth;
struct ocrdma_grh grh;
int eth_sz;
@@ -68,7 +68,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
memset(&grh, 0, sizeof(grh));
/* VLAN */
- vlan_tag = attr->vlan_id;
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
if (vlan_tag || dev->pfc_state) {
@@ -115,9 +114,11 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
{
u32 *ahid_addr;
- bool isvlan = false;
int status;
struct ocrdma_ah *ah;
+ bool isvlan = false;
+ u16 vlan_tag = 0xffff;
+ struct ib_gid_attr sgid_attr;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
union ib_gid sgid;
@@ -135,18 +136,25 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if (status)
goto av_err;
- status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid);
+ status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid,
+ &sgid_attr);
if (status) {
pr_err("%s(): Failed to query sgid, status = %d\n",
__func__, status);
goto av_conf_err;
}
+ if (sgid_attr.ndev) {
+ if (is_vlan_dev(sgid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
+ }
if ((pd->uctx) &&
(!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
(!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &attr->vlan_id);
+ attr->dmac, &vlan_tag,
+ sgid_attr.ndev->ifindex);
if (status) {
pr_err("%s(): Failed to resolve dmac from gid."
"status = %d\n", __func__, status);
@@ -154,7 +162,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
}
}
- status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan);
+ status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
if (status)
goto av_conf_err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index aab391a15db4..30f67bebffa3 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -47,6 +47,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_cache.h>
#include "ocrdma.h"
#include "ocrdma_hw.h"
@@ -678,11 +679,33 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
int dev_event = 0;
int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+ u16 qpid = cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK;
+ u16 cqid = cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK;
- if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID)
- qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK];
- if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
- cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+ /*
+ * Some FW version returns wrong qp or cq ids in CQEs.
+ * Checking whether the IDs are valid
+ */
+
+ if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) {
+ if (qpid < dev->attr.max_qp)
+ qp = dev->qp_tbl[qpid];
+ if (qp == NULL) {
+ pr_err("ocrdma%d:Async event - qpid %u is not valid\n",
+ dev->id, qpid);
+ return;
+ }
+ }
+
+ if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) {
+ if (cqid < dev->attr.max_cq)
+ cq = dev->cq_tbl[cqid];
+ if (cq == NULL) {
+ pr_err("ocrdma%d:Async event - cqid %u is not valid\n",
+ dev->id, cqid);
+ return;
+ }
+ }
memset(&ib_evt, 0, sizeof(ib_evt));
@@ -2448,6 +2471,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
int status;
struct ib_ah_attr *ah_attr = &attrs->ah_attr;
union ib_gid sgid, zgid;
+ struct ib_gid_attr sgid_attr;
u32 vlan_id = 0xFFFF;
u8 mac_addr[6];
struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
@@ -2466,10 +2490,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
sizeof(cmd->params.dgid));
- status = ocrdma_query_gid(&dev->ibdev, 1,
- ah_attr->grh.sgid_index, &sgid);
- if (status)
- return status;
+
+ status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index,
+ &sgid, &sgid_attr);
+ if (!status && sgid_attr.ndev) {
+ vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+ memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(sgid_attr.ndev);
+ }
memset(&zgid, 0, sizeof(zgid));
if (!memcmp(&sgid, &zgid, sizeof(zgid)))
@@ -2486,17 +2514,15 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
- if (attr_mask & IB_QP_VID) {
- vlan_id = attrs->vlan_id;
- } else if (dev->pfc_state) {
- vlan_id = 0;
- pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
- dev->id);
- pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
- dev->id);
- }
if (vlan_id < 0x1000) {
+ if (dev->pfc_state) {
+ vlan_id = 0;
+ pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
+ dev->id);
+ pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
+ dev->id);
+ }
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 87aa55df7c82..62b7009daa6c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -63,8 +63,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
MODULE_AUTHOR("Emulex Corporation");
MODULE_LICENSE("Dual BSD/GPL");
-static LIST_HEAD(ocrdma_dev_list);
-static DEFINE_SPINLOCK(ocrdma_devlist_lock);
static DEFINE_IDR(ocrdma_dev_id);
void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
@@ -182,8 +180,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
dev->ibdev.alloc_mr = ocrdma_alloc_mr;
- dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
- dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
+ dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
/* mandatory to support user space verbs consumer. */
dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
@@ -325,9 +322,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
goto sysfs_err;
- spin_lock(&ocrdma_devlist_lock);
- list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
- spin_unlock(&ocrdma_devlist_lock);
/* Init stats */
ocrdma_add_port_stats(dev);
/* Interrupt Moderation */
@@ -356,9 +350,8 @@ idr_err:
return NULL;
}
-static void ocrdma_remove_free(struct rcu_head *rcu)
+static void ocrdma_remove_free(struct ocrdma_dev *dev)
{
- struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
idr_remove(&ocrdma_dev_id, dev->id);
kfree(dev->mbx_cmd);
@@ -375,15 +368,9 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
ib_unregister_device(&dev->ibdev);
ocrdma_rem_port_stats(dev);
-
- spin_lock(&ocrdma_devlist_lock);
- list_del_rcu(&dev->entry);
- spin_unlock(&ocrdma_devlist_lock);
-
ocrdma_free_resources(dev);
ocrdma_cleanup_hw(dev);
-
- call_rcu(&dev->rcu, ocrdma_remove_free);
+ ocrdma_remove_free(dev);
}
static int ocrdma_open(struct ocrdma_dev *dev)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 69334e214571..86c303a620c1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -855,9 +855,9 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
{
if (!dev->dir)
return;
+ debugfs_remove(dev->dir);
mutex_destroy(&dev->stats_lock);
ocrdma_release_stats_mem(dev);
- debugfs_remove(dev->dir);
}
void ocrdma_init_debugfs(void)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 1f3affb6a477..583001bcfb8f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -73,7 +73,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
if (index >= OCRDMA_MAX_SGID)
return -EINVAL;
- ret = ib_get_cached_gid(ibdev, port, index, sgid);
+ ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
if (ret == -EAGAIN) {
memcpy(sgid, &zgid, sizeof(*sgid));
return 0;
@@ -1013,6 +1013,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
(void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
+ kfree(mr->pages);
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
/* it could be user registered memory. */
@@ -1997,13 +1998,13 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
{
struct ocrdma_ewqe_ud_hdr *ud_hdr =
(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
- struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
+ struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
- ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
+ ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
if (qp->qp_type == IB_QPT_GSI)
ud_hdr->qkey = qp->qkey;
else
- ud_hdr->qkey = wr->wr.ud.remote_qkey;
+ ud_hdr->qkey = ud_wr(wr)->remote_qkey;
ud_hdr->rsvd_ahid = ah->id;
if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
@@ -2106,9 +2107,9 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
if (status)
return status;
- ext_rw->addr_lo = wr->wr.rdma.remote_addr;
- ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
- ext_rw->lrkey = wr->wr.rdma.rkey;
+ ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
+ ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
+ ext_rw->lrkey = rdma_wr(wr)->rkey;
ext_rw->len = hdr->total_len;
return 0;
}
@@ -2126,46 +2127,12 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
- ext_rw->addr_lo = wr->wr.rdma.remote_addr;
- ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
- ext_rw->lrkey = wr->wr.rdma.rkey;
+ ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
+ ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
+ ext_rw->lrkey = rdma_wr(wr)->rkey;
ext_rw->len = hdr->total_len;
}
-static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
- struct ocrdma_hw_mr *hwmr)
-{
- int i;
- u64 buf_addr = 0;
- int num_pbes;
- struct ocrdma_pbe *pbe;
-
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
-
- /* go through the OS phy regions & fill hw pbe entries into pbls. */
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- /* number of pbes can be more for one OS buf, when
- * buffers are of different sizes.
- * split the ib_buf to one or more pbes.
- */
- buf_addr = wr->wr.fast_reg.page_list->page_list[i];
- pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
- pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
- num_pbes += 1;
- pbe++;
-
- /* if the pbl is full storing the pbes,
- * move to next pbl.
- */
- if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- }
- }
- return;
-}
-
static int get_encoded_page_size(int pg_sz)
{
/* Max size is 256M 4096 << 16 */
@@ -2176,48 +2143,59 @@ static int get_encoded_page_size(int pg_sz)
return i;
}
-
-static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
- struct ib_send_wr *wr)
+static int ocrdma_build_reg(struct ocrdma_qp *qp,
+ struct ocrdma_hdr_wqe *hdr,
+ struct ib_reg_wr *wr)
{
u64 fbo;
struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
- struct ocrdma_mr *mr;
- struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
+ struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
+ struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
+ struct ocrdma_pbe *pbe;
u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
+ int num_pbes = 0, i;
wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
- if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr)
- return -EINVAL;
-
hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
- if (wr->wr.fast_reg.page_list_len == 0)
- BUG();
- if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
+ if (wr->access & IB_ACCESS_LOCAL_WRITE)
hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
- if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)
+ if (wr->access & IB_ACCESS_REMOTE_WRITE)
hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
- if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)
+ if (wr->access & IB_ACCESS_REMOTE_READ)
hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
- hdr->lkey = wr->wr.fast_reg.rkey;
- hdr->total_len = wr->wr.fast_reg.length;
+ hdr->lkey = wr->key;
+ hdr->total_len = mr->ibmr.length;
- fbo = wr->wr.fast_reg.iova_start -
- (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
+ fbo = mr->ibmr.iova - mr->pages[0];
- fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start);
- fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff);
+ fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
+ fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
fast_reg->fbo_hi = upper_32_bits(fbo);
fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
- fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
- fast_reg->size_sge =
- get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
- mr = (struct ocrdma_mr *) (unsigned long)
- dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
- build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
+ fast_reg->num_sges = mr->npages;
+ fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
+
+ pbe = pbl_tbl->va;
+ for (i = 0; i < mr->npages; i++) {
+ u64 buf_addr = mr->pages[i];
+
+ pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+ pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
+ num_pbes += 1;
+ pbe++;
+
+ /* if the pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ }
+ }
+
return 0;
}
@@ -2300,8 +2278,8 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
hdr->lkey = wr->ex.invalidate_rkey;
break;
- case IB_WR_FAST_REG_MR:
- status = ocrdma_build_fr(qp, hdr, wr);
+ case IB_WR_REG_MR:
+ status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
break;
default:
status = -EINVAL;
@@ -2567,7 +2545,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
ibwc->opcode = IB_WC_SEND;
break;
case OCRDMA_FR_MR:
- ibwc->opcode = IB_WC_FAST_REG_MR;
+ ibwc->opcode = IB_WC_REG_MR;
break;
case OCRDMA_LKEY_INV:
ibwc->opcode = IB_WC_LOCAL_INV;
@@ -2933,16 +2911,11 @@ expand_cqe:
}
stop_cqe:
cq->getp = cur_getp;
- if (cq->deferred_arm) {
- ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
- polled_hw_cqes);
+ if (cq->deferred_arm || polled_hw_cqes) {
+ ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm,
+ cq->deferred_sol, polled_hw_cqes);
cq->deferred_arm = false;
cq->deferred_sol = false;
- } else {
- /* We need to pop the CQE. No need to arm */
- ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
- polled_hw_cqes);
- cq->deferred_sol = false;
}
return i;
@@ -3058,6 +3031,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mr->pages) {
+ status = -ENOMEM;
+ goto pl_err;
+ }
+
status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
if (status)
goto pbl_err;
@@ -3081,30 +3060,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
mbx_err:
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
pbl_err:
+ kfree(mr->pages);
+pl_err:
kfree(mr);
return ERR_PTR(-ENOMEM);
}
-struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
- *ibdev,
- int page_list_len)
-{
- struct ib_fast_reg_page_list *frmr_list;
- int size;
-
- size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
- frmr_list = kzalloc(size, GFP_KERNEL);
- if (!frmr_list)
- return ERR_PTR(-ENOMEM);
- frmr_list->page_list = (u64 *)(frmr_list + 1);
- return frmr_list;
-}
-
-void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
-{
- kfree(page_list);
-}
-
#define MAX_KERNEL_PBE_SIZE 65536
static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
int buf_cnt, u32 *pbe_size)
@@ -3267,3 +3228,26 @@ pbl_err:
kfree(mr);
return ERR_PTR(status);
}
+
+static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
+
+ if (unlikely(mr->npages == mr->hwmr.num_pbes))
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = addr;
+
+ return 0;
+}
+
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
+{
+ struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
+
+ mr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 308c16857a5d..a2f3b4dc20b0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -125,9 +125,8 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
- *ibdev,
- int page_list_len);
-void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list);
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 5afaa218508d..d725c565518d 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -336,14 +336,15 @@ bail:
}
/*
- * Initialize the memory region specified by the work reqeust.
+ * Initialize the memory region specified by the work request.
*/
-int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
+int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
{
struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct qib_pd *pd = to_ipd(qp->ibqp.pd);
- struct qib_mregion *mr;
- u32 rkey = wr->wr.fast_reg.rkey;
+ struct qib_mr *mr = to_imr(wr->mr);
+ struct qib_mregion *mrg;
+ u32 key = wr->key;
unsigned i, n, m;
int ret = -EINVAL;
unsigned long flags;
@@ -351,33 +352,33 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
size_t ps;
spin_lock_irqsave(&rkt->lock, flags);
- if (pd->user || rkey == 0)
+ if (pd->user || key == 0)
goto bail;
- mr = rcu_dereference_protected(
- rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))],
+ mrg = rcu_dereference_protected(
+ rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
lockdep_is_held(&rkt->lock));
- if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
+ if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
goto bail;
- if (wr->wr.fast_reg.page_list_len > mr->max_segs)
+ if (mr->npages > mrg->max_segs)
goto bail;
- ps = 1UL << wr->wr.fast_reg.page_shift;
- if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
+ ps = mr->ibmr.page_size;
+ if (mr->ibmr.length > ps * mr->npages)
goto bail;
- mr->user_base = wr->wr.fast_reg.iova_start;
- mr->iova = wr->wr.fast_reg.iova_start;
- mr->lkey = rkey;
- mr->length = wr->wr.fast_reg.length;
- mr->access_flags = wr->wr.fast_reg.access_flags;
- page_list = wr->wr.fast_reg.page_list->page_list;
+ mrg->user_base = mr->ibmr.iova;
+ mrg->iova = mr->ibmr.iova;
+ mrg->lkey = key;
+ mrg->length = mr->ibmr.length;
+ mrg->access_flags = wr->access;
+ page_list = mr->pages;
m = 0;
n = 0;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- mr->map[m]->segs[n].vaddr = (void *) page_list[i];
- mr->map[m]->segs[n].length = ps;
+ for (i = 0; i < mr->npages; i++) {
+ mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
+ mrg->map[m]->segs[n].length = ps;
if (++n == QIB_SEGSZ) {
m++;
n = 0;
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 19220dcb9a3b..294f5c706be9 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -303,6 +303,7 @@ int qib_dereg_mr(struct ib_mr *ibmr)
int ret = 0;
unsigned long timeout;
+ kfree(mr->pages);
qib_free_lkey(&mr->mr);
qib_put_mr(&mr->mr); /* will set completion if last */
@@ -323,7 +324,7 @@ out:
/*
* Allocate a memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
+ * IB_WR_REG_MR send work request.
*
* Return the memory region on success, otherwise return an errno.
*/
@@ -340,37 +341,38 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
if (IS_ERR(mr))
return (struct ib_mr *)mr;
+ mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mr->pages)
+ goto err;
+
return &mr->ibmr;
+
+err:
+ qib_dereg_mr(&mr->ibmr);
+ return ERR_PTR(-ENOMEM);
}
-struct ib_fast_reg_page_list *
-qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
+static int qib_set_page(struct ib_mr *ibmr, u64 addr)
{
- unsigned size = page_list_len * sizeof(u64);
- struct ib_fast_reg_page_list *pl;
-
- if (size > PAGE_SIZE)
- return ERR_PTR(-EINVAL);
-
- pl = kzalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl)
- return ERR_PTR(-ENOMEM);
+ struct qib_mr *mr = to_imr(ibmr);
- pl->page_list = kzalloc(size, GFP_KERNEL);
- if (!pl->page_list)
- goto err_free;
+ if (unlikely(mr->npages == mr->mr.max_segs))
+ return -ENOMEM;
- return pl;
+ mr->pages[mr->npages++] = addr;
-err_free:
- kfree(pl);
- return ERR_PTR(-ENOMEM);
+ return 0;
}
-void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
+int qib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- kfree(pl->page_list);
- kfree(pl);
+ struct qib_mr *mr = to_imr(ibmr);
+
+ mr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 4fa88ba2963e..40f85bb3e0d3 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -436,7 +436,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 4544d6f88ad7..e6b7556d5221 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -373,10 +373,11 @@ int qib_make_rc_req(struct qib_qp *qp)
qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
goto bail;
}
+
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
@@ -386,15 +387,15 @@ int qib_make_rc_req(struct qib_qp *qp)
len = pmtu;
break;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_ONLY);
else {
- qp->s_state =
- OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
- ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ ohdr->u.rc.imm_data =
+ wqe->rdma_wr.wr.ex.imm_data;
hwords += 1;
- if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
}
bth2 |= IB_BTH_REQ_ACK;
@@ -424,10 +425,11 @@ int qib_make_rc_req(struct qib_qp *qp)
qp->s_next_psn += (len - 1) / pmtu;
wqe->lpsn = qp->s_next_psn++;
}
+
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -455,24 +457,24 @@ int qib_make_rc_req(struct qib_qp *qp)
qp->s_lsn++;
wqe->lpsn = wqe->psn;
}
- if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
+ wqe->atomic_wr.swap);
ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
} else {
qp->s_state = OP(FETCH_ADD);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
ohdr->u.atomic_eth.compare_data = 0;
}
ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr >> 32);
+ wqe->atomic_wr.remote_addr >> 32);
ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr);
+ wqe->atomic_wr.remote_addr);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
+ wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
@@ -597,9 +599,9 @@ int qib_make_rc_req(struct qib_qp *qp)
*/
len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ cpu_to_be64(wqe->rdma_wr.remote_addr + len);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 22e356ca8058..b1aa21bdd484 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -459,8 +459,8 @@ again:
if (wqe->length == 0)
break;
if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
@@ -472,8 +472,8 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = 0;
@@ -490,18 +490,18 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->wr.wr.atomic.remote_addr,
- wqe->wr.wr.atomic.rkey,
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->wr.wr.atomic.compare_add;
+ sdata = wqe->atomic_wr.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
- (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->wr.wr.atomic.swap);
+ sdata, wqe->atomic_wr.swap);
qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -785,7 +785,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index aa3a8035bb68..06a564589c35 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -129,9 +129,9 @@ int qib_make_uc_req(struct qib_qp *qp)
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 26243b722b5e..59193f67ea78 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -59,7 +59,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
u32 length;
enum ib_qp_type sqptype, dqptype;
- qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+ qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
return;
@@ -76,7 +76,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
goto drop;
}
- ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -106,8 +106,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
- sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+ qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
+ sqp->qkey : swqe->ud_wr.remote_qkey;
if (unlikely(qkey != qp->qkey)) {
u16 lid;
@@ -210,7 +210,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
- swqe->wr.wr.ud.pkey_index : 0;
+ swqe->ud_wr.pkey_index : 0;
wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
wc.sl = ah_attr->sl;
wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
@@ -277,7 +277,7 @@ int qib_make_ud_req(struct qib_qp *qp)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
if (ah_attr->dlid != QIB_PERMISSIVE_LID)
this_cpu_inc(ibp->pmastats->n_multicast_xmit);
@@ -363,7 +363,7 @@ int qib_make_ud_req(struct qib_qp *qp)
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
- wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
+ wqe->ud_wr.pkey_index : qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
/*
* Use the multicast QP if the destination LID is a multicast LID.
@@ -371,14 +371,14 @@ int qib_make_ud_req(struct qib_qp *qp)
ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
ah_attr->dlid != QIB_PERMISSIVE_LID ?
cpu_to_be32(QIB_MULTICAST_QPN) :
- cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
- qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+ qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 3dcc4985b60f..de6cb6fcda8d 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -362,8 +362,8 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
* undefined operations.
* Make sure buffer is large enough to hold the result for atomics.
*/
- if (wr->opcode == IB_WR_FAST_REG_MR) {
- if (qib_fast_reg_mr(qp, wr))
+ if (wr->opcode == IB_WR_REG_MR) {
+ if (qib_reg_mr(qp, reg_wr(wr)))
goto bail_inval;
} else if (qp->ibqp.qp_type == IB_QPT_UC) {
if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
@@ -374,7 +374,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
wr->opcode != IB_WR_SEND_WITH_IMM)
goto bail_inval;
/* Check UD destination address PD */
- if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
goto bail_inval;
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
goto bail_inval;
@@ -397,7 +397,23 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
rkt = &to_idev(qp->ibqp.device)->lk_table;
pd = to_ipd(qp->ibqp.pd);
wqe = get_swqe_ptr(qp, qp->s_head);
- wqe->wr = *wr;
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_REG_MR)
+ memcpy(&wqe->reg_wr, reg_wr(wr),
+ sizeof(wqe->reg_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
wqe->length = 0;
j = 0;
if (wr->num_sge) {
@@ -426,7 +442,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
qp->port_num - 1)->ibmtu)
goto bail_inval_free;
else
- atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+ atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
@@ -2244,8 +2260,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
ibdev->alloc_mr = qib_alloc_mr;
- ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
- ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
+ ibdev->map_mr_sg = qib_map_mr_sg;
ibdev->alloc_fmr = qib_alloc_fmr;
ibdev->map_phys_fmr = qib_map_phys_fmr;
ibdev->unmap_fmr = qib_unmap_fmr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index a08df70e8503..2baf5ad251ed 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -330,6 +330,8 @@ struct qib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct qib_mregion mr; /* must be last */
+ u64 *pages;
+ u32 npages;
};
/*
@@ -338,7 +340,13 @@ struct qib_mr {
* in qp->s_max_sge.
*/
struct qib_swqe {
- struct ib_send_wr wr; /* don't use wr.sg_list */
+ union {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ struct ib_ud_wr ud_wr;
+ struct ib_reg_wr reg_wr;
+ struct ib_rdma_wr rdma_wr;
+ struct ib_atomic_wr atomic_wr;
+ };
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
@@ -1038,12 +1046,11 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_entries);
-struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
- struct ib_device *ibdev, int page_list_len);
-
-void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
+int qib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
-int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr);
+int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0c15bd885035..565c881a44ba 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -343,16 +343,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
netdev = pci_get_drvdata(dev);
us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
- if (IS_ERR_OR_NULL(us_ibdev)) {
+ if (!us_ibdev) {
usnic_err("Device %s context alloc failed\n",
netdev_name(pci_get_drvdata(dev)));
- return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
+ return ERR_PTR(-EFAULT);
}
us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
- if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
- usnic_err("Failed to alloc ufdev for %s with err %ld\n",
- pci_name(dev), PTR_ERR(us_ibdev->ufdev));
+ if (!us_ibdev->ufdev) {
+ usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev));
goto err_dealloc;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 85dc3f989ff7..fcea3a24d3eb 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -236,8 +236,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
/* Create Flow Handle */
qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
- if (IS_ERR_OR_NULL(qp_flow)) {
- err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ if (!qp_flow) {
+ err = -ENOMEM;
goto out_dealloc_flow;
}
qp_flow->flow = flow;
@@ -311,8 +311,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
/* Create qp_flow */
qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
- if (IS_ERR_OR_NULL(qp_flow)) {
- err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ if (!qp_flow) {
+ err = -ENOMEM;
goto out_dealloc_flow;
}
qp_flow->flow = flow;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index edc5b8565d6d..3ede10309754 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -360,7 +360,7 @@ struct ipoib_dev_priv {
unsigned tx_head;
unsigned tx_tail;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
- struct ib_send_wr tx_wr;
+ struct ib_ud_wr tx_wr;
unsigned tx_outstanding;
struct ib_wc send_wc[MAX_SEND_CQE];
@@ -528,7 +528,7 @@ static inline void ipoib_build_sge(struct ipoib_dev_priv *priv,
priv->tx_sge[i + off].addr = mapping[i + off];
priv->tx_sge[i + off].length = skb_frag_size(&frags[i]);
}
- priv->tx_wr.num_sge = nr_frags + off;
+ priv->tx_wr.wr.num_sge = nr_frags + off;
}
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c78dc1638030..3ae9726efb98 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -700,9 +700,9 @@ static inline int post_send(struct ipoib_dev_priv *priv,
ipoib_build_sge(priv, tx_req);
- priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
+ priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM;
- return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
+ return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr);
}
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index d266667ca9b8..5ea0c14070d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -518,19 +518,19 @@ static inline int post_send(struct ipoib_dev_priv *priv,
ipoib_build_sge(priv, tx_req);
- priv->tx_wr.wr_id = wr_id;
- priv->tx_wr.wr.ud.remote_qpn = qpn;
- priv->tx_wr.wr.ud.ah = address;
+ priv->tx_wr.wr.wr_id = wr_id;
+ priv->tx_wr.remote_qpn = qpn;
+ priv->tx_wr.ah = address;
if (head) {
- priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size;
- priv->tx_wr.wr.ud.header = head;
- priv->tx_wr.wr.ud.hlen = hlen;
- priv->tx_wr.opcode = IB_WR_LSO;
+ priv->tx_wr.mss = skb_shinfo(skb)->gso_size;
+ priv->tx_wr.header = head;
+ priv->tx_wr.hlen = hlen;
+ priv->tx_wr.wr.opcode = IB_WR_LSO;
} else
- priv->tx_wr.opcode = IB_WR_SEND;
+ priv->tx_wr.wr.opcode = IB_WR_SEND;
- return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
+ return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
}
void ipoib_send(struct net_device *dev, struct sk_buff *skb,
@@ -583,9 +583,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
}
if (skb->ip_summed == CHECKSUM_PARTIAL)
- priv->tx_wr.send_flags |= IB_SEND_IP_CSUM;
+ priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM;
else
- priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+ priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index babba05d7a0e..7d3281866ffc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -461,7 +461,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
netdev_update_features(dev);
dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
rtnl_unlock();
- priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+ priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
ipoib_flush_paths(dev);
rtnl_lock();
@@ -1860,7 +1860,7 @@ static struct net_device *ipoib_add_port(const char *format,
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
- result = ib_query_gid(hca, port, 0, &priv->local_gid);
+ result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
if (result) {
printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
hca->name, port, result);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index d750a86042f3..f357ca67a41c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
spin_unlock_irq(&priv->lock);
- priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+ priv->tx_wr.remote_qkey = priv->qkey;
set_qkey = 1;
}
@@ -561,7 +561,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
}
priv->local_lid = port_attr.lid;
- if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
+ if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 78845b6e8b81..d48c5bae7877 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -221,9 +221,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
- priv->tx_wr.opcode = IB_WR_SEND;
- priv->tx_wr.sg_list = priv->tx_sge;
- priv->tx_wr.send_flags = IB_SEND_SIGNALED;
+ priv->tx_wr.wr.opcode = IB_WR_SEND;
+ priv->tx_wr.wr.sg_list = priv->tx_sge;
+ priv->tx_wr.wr.send_flags = IB_SEND_SIGNALED;
priv->rx_sge[0].lkey = priv->pd->local_dma_lkey;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index f58ff96b6cbb..9080161e01af 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -111,7 +111,7 @@ module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO);
MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
/*
- * iscsi_iser_recv() - Process a successfull recv completion
+ * iscsi_iser_recv() - Process a successful recv completion
* @conn: iscsi connection
* @hdr: iscsi header
* @rx_data: buffer containing receive data payload
@@ -126,7 +126,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
{
int rc = 0;
int datalen;
- int ahslen;
/* verify PDU length */
datalen = ntoh24(hdr->dlength);
@@ -141,9 +140,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
datalen, rx_data_len);
- /* read AHS */
- ahslen = hdr->hlength * 4;
-
rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len);
if (rc && rc != ISCSI_ERR_NO_SCSI_CMD)
goto error;
@@ -766,9 +762,7 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
- stats->custom_length = 1;
- strcpy(stats->custom[0].desc, "fmr_unalign_cnt");
- stats->custom[0].value = conn->fmr_unalign_cnt;
+ stats->custom_length = 0;
}
static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
@@ -973,6 +967,13 @@ static umode_t iser_attr_is_visible(int param_type, int param)
return 0;
}
+static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
+{
+ blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
+
+ return 0;
+}
+
static struct scsi_host_template iscsi_iser_sht = {
.module = THIS_MODULE,
.name = "iSCSI Initiator over iSER",
@@ -985,7 +986,8 @@ static struct scsi_host_template iscsi_iser_sht = {
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.target_alloc = iscsi_target_alloc,
- .use_clustering = DISABLE_CLUSTERING,
+ .use_clustering = ENABLE_CLUSTERING,
+ .slave_alloc = iscsi_iser_slave_alloc,
.proc_name = "iscsi_iser",
.this_id = -1,
.track_queue_depth = 1,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index a5edd6ede692..8a5998e6a407 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -227,18 +227,13 @@ enum iser_data_dir {
* @size: num entries of this sg
* @data_len: total beffer byte len
* @dma_nents: returned by dma_map_sg
- * @orig_sg: pointer to the original sg list (in case
- * we used a copy)
- * @orig_size: num entris of orig sg list
*/
struct iser_data_buf {
struct scatterlist *sg;
- unsigned int size;
+ int size;
unsigned long data_len;
unsigned int dma_nents;
- struct scatterlist *orig_sg;
- unsigned int orig_size;
- };
+};
/* fwd declarations */
struct iser_device;
@@ -300,7 +295,11 @@ struct iser_tx_desc {
int num_sge;
bool mapped;
u8 wr_idx;
- struct ib_send_wr wrs[ISER_MAX_WRS];
+ union iser_wr {
+ struct ib_send_wr send;
+ struct ib_reg_wr fast_reg;
+ struct ib_sig_handover_wr sig;
+ } wrs[ISER_MAX_WRS];
struct iser_mem_reg data_reg;
struct iser_mem_reg prot_reg;
struct ib_sig_attrs sig_attrs;
@@ -413,7 +412,6 @@ struct iser_device {
*
* @mr: memory region
* @fmr_pool: pool of fmrs
- * @frpl: fast reg page list used by frwrs
* @page_vec: fast reg page list used by fmr pool
* @mr_valid: is mr valid indicator
*/
@@ -422,10 +420,7 @@ struct iser_reg_resources {
struct ib_mr *mr;
struct ib_fmr_pool *fmr_pool;
};
- union {
- struct ib_fast_reg_page_list *frpl;
- struct iser_page_vec *page_vec;
- };
+ struct iser_page_vec *page_vec;
u8 mr_valid:1;
};
@@ -712,11 +707,11 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
static inline struct ib_send_wr *
iser_tx_next_wr(struct iser_tx_desc *tx_desc)
{
- struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx];
+ struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send;
struct ib_send_wr *last_wr;
if (tx_desc->wr_idx) {
- last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1];
+ last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send;
last_wr->next = cur_wr;
}
tx_desc->wr_idx++;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index d511879d8cdf..ffd00c420729 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -661,48 +661,14 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
- int is_rdma_data_aligned = 1;
- int is_rdma_prot_aligned = 1;
int prot_count = scsi_prot_sg_count(iser_task->sc);
- /* if we were reading, copy back to unaligned sglist,
- * anyway dma_unmap and free the copy
- */
- if (iser_task->data[ISER_DIR_IN].orig_sg) {
- is_rdma_data_aligned = 0;
- iser_finalize_rdma_unaligned_sg(iser_task,
- &iser_task->data[ISER_DIR_IN],
- ISER_DIR_IN);
- }
-
- if (iser_task->data[ISER_DIR_OUT].orig_sg) {
- is_rdma_data_aligned = 0;
- iser_finalize_rdma_unaligned_sg(iser_task,
- &iser_task->data[ISER_DIR_OUT],
- ISER_DIR_OUT);
- }
-
- if (iser_task->prot[ISER_DIR_IN].orig_sg) {
- is_rdma_prot_aligned = 0;
- iser_finalize_rdma_unaligned_sg(iser_task,
- &iser_task->prot[ISER_DIR_IN],
- ISER_DIR_IN);
- }
-
- if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
- is_rdma_prot_aligned = 0;
- iser_finalize_rdma_unaligned_sg(iser_task,
- &iser_task->prot[ISER_DIR_OUT],
- ISER_DIR_OUT);
- }
-
if (iser_task->dir[ISER_DIR_IN]) {
iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
- if (is_rdma_data_aligned)
- iser_dma_unmap_task_data(iser_task,
- &iser_task->data[ISER_DIR_IN],
- DMA_FROM_DEVICE);
- if (prot_count && is_rdma_prot_aligned)
+ iser_dma_unmap_task_data(iser_task,
+ &iser_task->data[ISER_DIR_IN],
+ DMA_FROM_DEVICE);
+ if (prot_count)
iser_dma_unmap_task_data(iser_task,
&iser_task->prot[ISER_DIR_IN],
DMA_FROM_DEVICE);
@@ -710,11 +676,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
if (iser_task->dir[ISER_DIR_OUT]) {
iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
- if (is_rdma_data_aligned)
- iser_dma_unmap_task_data(iser_task,
- &iser_task->data[ISER_DIR_OUT],
- DMA_TO_DEVICE);
- if (prot_count && is_rdma_prot_aligned)
+ iser_dma_unmap_task_data(iser_task,
+ &iser_task->data[ISER_DIR_OUT],
+ DMA_TO_DEVICE);
+ if (prot_count)
iser_dma_unmap_task_data(iser_task,
&iser_task->prot[ISER_DIR_OUT],
DMA_TO_DEVICE);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 4c46d67d37a1..ea765fb9664d 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -88,113 +88,6 @@ int iser_assign_reg_ops(struct iser_device *device)
return 0;
}
-static void
-iser_free_bounce_sg(struct iser_data_buf *data)
-{
- struct scatterlist *sg;
- int count;
-
- for_each_sg(data->sg, sg, data->size, count)
- __free_page(sg_page(sg));
-
- kfree(data->sg);
-
- data->sg = data->orig_sg;
- data->size = data->orig_size;
- data->orig_sg = NULL;
- data->orig_size = 0;
-}
-
-static int
-iser_alloc_bounce_sg(struct iser_data_buf *data)
-{
- struct scatterlist *sg;
- struct page *page;
- unsigned long length = data->data_len;
- int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
-
- sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
- if (!sg)
- goto err;
-
- sg_init_table(sg, nents);
- while (length) {
- u32 page_len = min_t(u32, length, PAGE_SIZE);
-
- page = alloc_page(GFP_ATOMIC);
- if (!page)
- goto err;
-
- sg_set_page(&sg[i], page, page_len, 0);
- length -= page_len;
- i++;
- }
-
- data->orig_sg = data->sg;
- data->orig_size = data->size;
- data->sg = sg;
- data->size = nents;
-
- return 0;
-
-err:
- for (; i > 0; i--)
- __free_page(sg_page(&sg[i - 1]));
- kfree(sg);
-
- return -ENOMEM;
-}
-
-static void
-iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
-{
- struct scatterlist *osg, *bsg = data->sg;
- void *oaddr, *baddr;
- unsigned int left = data->data_len;
- unsigned int bsg_off = 0;
- int i;
-
- for_each_sg(data->orig_sg, osg, data->orig_size, i) {
- unsigned int copy_len, osg_off = 0;
-
- oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
- copy_len = min(left, osg->length);
- while (copy_len) {
- unsigned int len = min(copy_len, bsg->length - bsg_off);
-
- baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
- if (to_buffer)
- memcpy(baddr + bsg_off, oaddr + osg_off, len);
- else
- memcpy(oaddr + osg_off, baddr + bsg_off, len);
-
- kunmap_atomic(baddr - bsg->offset);
- osg_off += len;
- bsg_off += len;
- copy_len -= len;
-
- if (bsg_off >= bsg->length) {
- bsg = sg_next(bsg);
- bsg_off = 0;
- }
- }
- kunmap_atomic(oaddr - osg->offset);
- left -= osg_off;
- }
-}
-
-static inline void
-iser_copy_from_bounce(struct iser_data_buf *data)
-{
- iser_copy_bounce(data, false);
-}
-
-static inline void
-iser_copy_to_bounce(struct iser_data_buf *data)
-{
- iser_copy_bounce(data, true);
-}
-
struct iser_fr_desc *
iser_reg_desc_get_fr(struct ib_conn *ib_conn)
{
@@ -238,62 +131,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
{
}
-/**
- * iser_start_rdma_unaligned_sg
- */
-static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- enum iser_data_dir cmd_dir)
-{
- struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
- int rc;
-
- rc = iser_alloc_bounce_sg(data);
- if (rc) {
- iser_err("Failed to allocate bounce for data len %lu\n",
- data->data_len);
- return rc;
- }
-
- if (cmd_dir == ISER_DIR_OUT)
- iser_copy_to_bounce(data);
-
- data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
- (cmd_dir == ISER_DIR_OUT) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (!data->dma_nents) {
- iser_err("Got dma_nents %d, something went wrong...\n",
- data->dma_nents);
- rc = -ENOMEM;
- goto err;
- }
-
- return 0;
-err:
- iser_free_bounce_sg(data);
- return rc;
-}
-
-/**
- * iser_finalize_rdma_unaligned_sg
- */
-
-void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- enum iser_data_dir cmd_dir)
-{
- struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
-
- ib_dma_unmap_sg(dev, data->sg, data->size,
- (cmd_dir == ISER_DIR_OUT) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
- if (cmd_dir == ISER_DIR_IN)
- iser_copy_from_bounce(data);
-
- iser_free_bounce_sg(data);
-}
-
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
/**
@@ -355,64 +192,6 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
return cur_page;
}
-
-/**
- * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
- * for RDMA sub-list of a scatter-gather list of memory buffers, and returns
- * the number of entries which are aligned correctly. Supports the case where
- * consecutive SG elements are actually fragments of the same physcial page.
- */
-static int iser_data_buf_aligned_len(struct iser_data_buf *data,
- struct ib_device *ibdev,
- unsigned sg_tablesize)
-{
- struct scatterlist *sg, *sgl, *next_sg = NULL;
- u64 start_addr, end_addr;
- int i, ret_len, start_check = 0;
-
- if (data->dma_nents == 1)
- return 1;
-
- sgl = data->sg;
- start_addr = ib_sg_dma_address(ibdev, sgl);
-
- if (unlikely(sgl[0].offset &&
- data->data_len >= sg_tablesize * PAGE_SIZE)) {
- iser_dbg("can't register length %lx with offset %x "
- "fall to bounce buffer\n", data->data_len,
- sgl[0].offset);
- return 0;
- }
-
- for_each_sg(sgl, sg, data->dma_nents, i) {
- if (start_check && !IS_4K_ALIGNED(start_addr))
- break;
-
- next_sg = sg_next(sg);
- if (!next_sg)
- break;
-
- end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
- start_addr = ib_sg_dma_address(ibdev, next_sg);
-
- if (end_addr == start_addr) {
- start_check = 0;
- continue;
- } else
- start_check = 1;
-
- if (!IS_4K_ALIGNED(end_addr))
- break;
- }
- ret_len = (next_sg) ? i : i+1;
-
- if (unlikely(ret_len != data->dma_nents))
- iser_warn("rdma alignment violation (%d/%d aligned)\n",
- ret_len, data->dma_nents);
-
- return ret_len;
-}
-
static void iser_data_buf_dump(struct iser_data_buf *data,
struct ib_device *ibdev)
{
@@ -483,31 +262,6 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
return 0;
}
-static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *mem,
- enum iser_data_dir cmd_dir)
-{
- struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
- struct iser_device *device = iser_task->iser_conn->ib_conn.device;
-
- iscsi_conn->fmr_unalign_cnt++;
-
- if (iser_debug_level > 0)
- iser_data_buf_dump(mem, device->ib_device);
-
- /* unmap the command data before accessing it */
- iser_dma_unmap_task_data(iser_task, mem,
- (cmd_dir == ISER_DIR_OUT) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
- /* allocate copy buf, if we are writing, copy the */
- /* unaligned scatterlist, dma map the copy */
- if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
- return -ENOMEM;
-
- return 0;
-}
-
/**
* iser_reg_page_vec - Register physical memory
*
@@ -683,7 +437,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
- struct ib_send_wr *wr;
+ struct ib_sig_handover_wr *wr;
int ret;
memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -693,26 +447,24 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
- if (!pi_ctx->sig_mr_valid) {
- wr = iser_tx_next_wr(tx_desc);
- iser_inv_rkey(wr, pi_ctx->sig_mr);
- }
-
- wr = iser_tx_next_wr(tx_desc);
- wr->opcode = IB_WR_REG_SIG_MR;
- wr->wr_id = ISER_FASTREG_LI_WRID;
- wr->sg_list = &data_reg->sge;
- wr->num_sge = 1;
- wr->send_flags = 0;
- wr->wr.sig_handover.sig_attrs = sig_attrs;
- wr->wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+ if (!pi_ctx->sig_mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+
+ wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
+ wr->wr.opcode = IB_WR_REG_SIG_MR;
+ wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.sg_list = &data_reg->sge;
+ wr->wr.num_sge = 1;
+ wr->wr.send_flags = 0;
+ wr->sig_attrs = sig_attrs;
+ wr->sig_mr = pi_ctx->sig_mr;
if (scsi_prot_sg_count(iser_task->sc))
- wr->wr.sig_handover.prot = &prot_reg->sge;
+ wr->prot = &prot_reg->sge;
else
- wr->wr.sig_handover.prot = NULL;
- wr->wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
+ wr->prot = NULL;
+ wr->access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
pi_ctx->sig_mr_valid = 0;
sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
@@ -720,7 +472,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
- iser_dbg("sig reg: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
+ iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n",
sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
sig_reg->sge.length);
err:
@@ -732,69 +484,41 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_reg_resources *rsc,
struct iser_mem_reg *reg)
{
- struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
- struct iser_device *device = ib_conn->device;
- struct ib_mr *mr = rsc->mr;
- struct ib_fast_reg_page_list *frpl = rsc->frpl;
struct iser_tx_desc *tx_desc = &iser_task->desc;
- struct ib_send_wr *wr;
- int offset, size, plen;
+ struct ib_mr *mr = rsc->mr;
+ struct ib_reg_wr *wr;
+ int n;
- plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
- &offset, &size);
- if (plen * SIZE_4K < size) {
- iser_err("fast reg page_list too short to hold this SG\n");
- return -EINVAL;
- }
+ if (!rsc->mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
- if (!rsc->mr_valid) {
- wr = iser_tx_next_wr(tx_desc);
- iser_inv_rkey(wr, mr);
+ n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
+ if (unlikely(n != mem->size)) {
+ iser_err("failed to map sg (%d/%d)\n",
+ n, mem->size);
+ return n < 0 ? n : -EINVAL;
}
- wr = iser_tx_next_wr(tx_desc);
- wr->opcode = IB_WR_FAST_REG_MR;
- wr->wr_id = ISER_FASTREG_LI_WRID;
- wr->send_flags = 0;
- wr->wr.fast_reg.iova_start = frpl->page_list[0] + offset;
- wr->wr.fast_reg.page_list = frpl;
- wr->wr.fast_reg.page_list_len = plen;
- wr->wr.fast_reg.page_shift = SHIFT_4K;
- wr->wr.fast_reg.length = size;
- wr->wr.fast_reg.rkey = mr->rkey;
- wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
+ wr = reg_wr(iser_tx_next_wr(tx_desc));
+ wr->wr.opcode = IB_WR_REG_MR;
+ wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.send_flags = 0;
+ wr->wr.num_sge = 0;
+ wr->mr = mr;
+ wr->key = mr->rkey;
+ wr->access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+
rsc->mr_valid = 0;
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
- reg->sge.addr = frpl->page_list[0] + offset;
- reg->sge.length = size;
+ reg->sge.addr = mr->iova;
+ reg->sge.length = mr->length;
- iser_dbg("fast reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
- " length=0x%x\n", reg->sge.lkey, reg->rkey,
- reg->sge.addr, reg->sge.length);
-
- return 0;
-}
-
-static int
-iser_handle_unaligned_buf(struct iscsi_iser_task *task,
- struct iser_data_buf *mem,
- enum iser_data_dir dir)
-{
- struct iser_conn *iser_conn = task->iser_conn;
- struct iser_device *device = iser_conn->ib_conn.device;
- int err, aligned_len;
-
- aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
- iser_conn->scsi_sg_tablesize);
- if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(task, mem, dir);
- if (err)
- return err;
- }
+ iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n",
+ reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length);
return 0;
}
@@ -841,10 +565,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
bool use_dma_key;
int err;
- err = iser_handle_unaligned_buf(task, mem, dir);
- if (unlikely(err))
- return err;
-
use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
@@ -867,10 +587,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
if (scsi_prot_sg_count(task->sc)) {
mem = &task->prot[dir];
- err = iser_handle_unaligned_buf(task, mem, dir);
- if (unlikely(err))
- goto err_reg;
-
err = iser_reg_prot_sg(task, mem, desc,
use_dma_key, prot_reg);
if (unlikely(err))
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 85132d867bc8..a93070210109 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -293,35 +293,21 @@ iser_alloc_reg_res(struct ib_device *ib_device,
{
int ret;
- res->frpl = ib_alloc_fast_reg_page_list(ib_device, size);
- if (IS_ERR(res->frpl)) {
- ret = PTR_ERR(res->frpl);
- iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
- ret);
- return PTR_ERR(res->frpl);
- }
-
res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size);
if (IS_ERR(res->mr)) {
ret = PTR_ERR(res->mr);
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
- goto fast_reg_mr_failure;
+ return ret;
}
res->mr_valid = 1;
return 0;
-
-fast_reg_mr_failure:
- ib_free_fast_reg_page_list(res->frpl);
-
- return ret;
}
static void
iser_free_reg_res(struct iser_reg_resources *rsc)
{
ib_dereg_mr(rsc->mr);
- ib_free_fast_reg_page_list(rsc->frpl);
}
static int
@@ -1017,7 +1003,7 @@ int iser_connect(struct iser_conn *iser_conn,
ib_conn->beacon.wr_id = ISER_BEACON_WRID;
ib_conn->beacon.opcode = IB_WR_SEND;
- ib_conn->cma_id = rdma_create_id(iser_cma_handler,
+ ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
(void *)iser_conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ib_conn->cma_id)) {
@@ -1135,7 +1121,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
wr->opcode = IB_WR_SEND;
wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
- ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0], &bad_wr);
+ ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr);
if (ib_ret)
iser_err("ib_post_send failed, ret:%d opcode:%d\n",
ib_ret, bad_wr->opcode);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index aa59037d7504..dfbbbb28090b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -473,10 +473,8 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
list_for_each_entry_safe(fr_desc, tmp,
&isert_conn->fr_pool, list) {
list_del(&fr_desc->list);
- ib_free_fast_reg_page_list(fr_desc->data_frpl);
ib_dereg_mr(fr_desc->data_mr);
if (fr_desc->pi_ctx) {
- ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
kfree(fr_desc->pi_ctx);
@@ -504,22 +502,13 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
return -ENOMEM;
}
- pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(pi_ctx->prot_frpl)) {
- isert_err("Failed to allocate prot frpl err=%ld\n",
- PTR_ERR(pi_ctx->prot_frpl));
- ret = PTR_ERR(pi_ctx->prot_frpl);
- goto err_pi_ctx;
- }
-
pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_mr)) {
isert_err("Failed to allocate prot frmr err=%ld\n",
PTR_ERR(pi_ctx->prot_mr));
ret = PTR_ERR(pi_ctx->prot_mr);
- goto err_prot_frpl;
+ goto err_pi_ctx;
}
desc->ind |= ISERT_PROT_KEY_VALID;
@@ -539,8 +528,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
err_prot_mr:
ib_dereg_mr(pi_ctx->prot_mr);
-err_prot_frpl:
- ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
err_pi_ctx:
kfree(pi_ctx);
@@ -551,34 +538,18 @@ static int
isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
struct fast_reg_descriptor *fr_desc)
{
- int ret;
-
- fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(fr_desc->data_frpl)) {
- isert_err("Failed to allocate data frpl err=%ld\n",
- PTR_ERR(fr_desc->data_frpl));
- return PTR_ERR(fr_desc->data_frpl);
- }
-
fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(fr_desc->data_mr)) {
isert_err("Failed to allocate data frmr err=%ld\n",
PTR_ERR(fr_desc->data_mr));
- ret = PTR_ERR(fr_desc->data_mr);
- goto err_data_frpl;
+ return PTR_ERR(fr_desc->data_mr);
}
fr_desc->ind |= ISERT_DATA_KEY_VALID;
isert_dbg("Created fr_desc %p\n", fr_desc);
return 0;
-
-err_data_frpl:
- ib_free_fast_reg_page_list(fr_desc->data_frpl);
-
- return ret;
}
static int
@@ -1579,7 +1550,6 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
struct iser_hdr *iser_hdr = &rx_desc->iser_header;
uint64_t read_va = 0, write_va = 0;
uint32_t read_stag = 0, write_stag = 0;
- int rc;
switch (iser_hdr->flags & 0xF0) {
case ISCSI_CTRL:
@@ -1606,8 +1576,8 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
break;
}
- rc = isert_rx_opcode(isert_conn, rx_desc,
- read_stag, read_va, write_stag, write_va);
+ isert_rx_opcode(isert_conn, rx_desc,
+ read_stag, read_va, write_stag, write_va);
}
static void
@@ -1716,10 +1686,10 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
isert_unmap_data_buf(isert_conn, &wr->data);
}
- if (wr->send_wr) {
+ if (wr->rdma_wr) {
isert_dbg("Cmd %p free send_wr\n", isert_cmd);
- kfree(wr->send_wr);
- wr->send_wr = NULL;
+ kfree(wr->rdma_wr);
+ wr->rdma_wr = NULL;
}
if (wr->ib_sge) {
@@ -1754,7 +1724,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
}
wr->ib_sge = NULL;
- wr->send_wr = NULL;
+ wr->rdma_wr = NULL;
}
static void
@@ -1923,7 +1893,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
}
device->unreg_rdma_mem(isert_cmd, isert_conn);
- wr->send_wr_num = 0;
+ wr->rdma_wr_num = 0;
if (ret)
transport_send_check_condition_and_sense(se_cmd,
se_cmd->pi_err, 0);
@@ -1951,7 +1921,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
iscsit_stop_dataout_timer(cmd);
device->unreg_rdma_mem(isert_cmd, isert_conn);
cmd->write_data_done = wr->data.len;
- wr->send_wr_num = 0;
+ wr->rdma_wr_num = 0;
isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
spin_lock_bh(&cmd->istate_lock);
@@ -2403,7 +2373,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
static int
isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
+ struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr,
u32 data_left, u32 offset)
{
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
@@ -2418,8 +2388,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
page_off = offset % PAGE_SIZE;
- send_wr->sg_list = ib_sge;
- send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
+ rdma_wr->wr.sg_list = ib_sge;
+ rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
/*
* Perform mapping of TCM scatterlist memory ib_sge dma_addr.
*/
@@ -2444,11 +2414,11 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
}
- send_wr->num_sge = ++i;
+ rdma_wr->wr.num_sge = ++i;
isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
- send_wr->sg_list, send_wr->num_sge);
+ rdma_wr->wr.sg_list, rdma_wr->wr.num_sge);
- return send_wr->num_sge;
+ return rdma_wr->wr.num_sge;
}
static int
@@ -2459,7 +2429,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
struct isert_data_buf *data = &wr->data;
- struct ib_send_wr *send_wr;
+ struct ib_rdma_wr *rdma_wr;
struct ib_sge *ib_sge;
u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
int ret = 0, i, ib_sge_cnt;
@@ -2484,11 +2454,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
}
wr->ib_sge = ib_sge;
- wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
- wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
+ wr->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
+ wr->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) * wr->rdma_wr_num,
GFP_KERNEL);
- if (!wr->send_wr) {
- isert_dbg("Unable to allocate wr->send_wr\n");
+ if (!wr->rdma_wr) {
+ isert_dbg("Unable to allocate wr->rdma_wr\n");
ret = -ENOMEM;
goto unmap_cmd;
}
@@ -2496,31 +2466,31 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
wr->isert_cmd = isert_cmd;
rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
- for (i = 0; i < wr->send_wr_num; i++) {
- send_wr = &isert_cmd->rdma_wr.send_wr[i];
+ for (i = 0; i < wr->rdma_wr_num; i++) {
+ rdma_wr = &isert_cmd->rdma_wr.rdma_wr[i];
data_len = min(data_left, rdma_write_max);
- send_wr->send_flags = 0;
+ rdma_wr->wr.send_flags = 0;
if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
- send_wr->opcode = IB_WR_RDMA_WRITE;
- send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset;
- send_wr->wr.rdma.rkey = isert_cmd->read_stag;
- if (i + 1 == wr->send_wr_num)
- send_wr->next = &isert_cmd->tx_desc.send_wr;
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ rdma_wr->remote_addr = isert_cmd->read_va + offset;
+ rdma_wr->rkey = isert_cmd->read_stag;
+ if (i + 1 == wr->rdma_wr_num)
+ rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr;
else
- send_wr->next = &wr->send_wr[i + 1];
+ rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
} else {
- send_wr->opcode = IB_WR_RDMA_READ;
- send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset;
- send_wr->wr.rdma.rkey = isert_cmd->write_stag;
- if (i + 1 == wr->send_wr_num)
- send_wr->send_flags = IB_SEND_SIGNALED;
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ rdma_wr->remote_addr = isert_cmd->write_va + va_offset;
+ rdma_wr->rkey = isert_cmd->write_stag;
+ if (i + 1 == wr->rdma_wr_num)
+ rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
else
- send_wr->next = &wr->send_wr[i + 1];
+ rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
}
ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
- send_wr, data_len, offset);
+ rdma_wr, data_len, offset);
ib_sge += ib_sge_cnt;
offset += data_len;
@@ -2535,45 +2505,6 @@ unmap_cmd:
return ret;
}
-static int
-isert_map_fr_pagelist(struct ib_device *ib_dev,
- struct scatterlist *sg_start, int sg_nents, u64 *fr_pl)
-{
- u64 start_addr, end_addr, page, chunk_start = 0;
- struct scatterlist *tmp_sg;
- int i = 0, new_chunk, last_ent, n_pages;
-
- n_pages = 0;
- new_chunk = 1;
- last_ent = sg_nents - 1;
- for_each_sg(sg_start, tmp_sg, sg_nents, i) {
- start_addr = ib_sg_dma_address(ib_dev, tmp_sg);
- if (new_chunk)
- chunk_start = start_addr;
- end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
-
- isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n",
- i, (unsigned long long)tmp_sg->dma_address,
- tmp_sg->length);
-
- if ((end_addr & ~PAGE_MASK) && i < last_ent) {
- new_chunk = 0;
- continue;
- }
- new_chunk = 1;
-
- page = chunk_start & PAGE_MASK;
- do {
- fr_pl[n_pages++] = page;
- isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n",
- n_pages - 1, page);
- page += PAGE_SIZE;
- } while (page < end_addr);
- }
-
- return n_pages;
-}
-
static inline void
isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
@@ -2599,11 +2530,9 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
struct isert_device *device = isert_conn->device;
struct ib_device *ib_dev = device->ib_device;
struct ib_mr *mr;
- struct ib_fast_reg_page_list *frpl;
- struct ib_send_wr fr_wr, inv_wr;
- struct ib_send_wr *bad_wr, *wr = NULL;
- int ret, pagelist_len;
- u32 page_off;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
+ int ret, n;
if (mem->dma_nents == 1) {
sge->lkey = device->pd->local_dma_lkey;
@@ -2614,45 +2543,41 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
return 0;
}
- if (ind == ISERT_DATA_KEY_VALID) {
+ if (ind == ISERT_DATA_KEY_VALID)
/* Registering data buffer */
mr = fr_desc->data_mr;
- frpl = fr_desc->data_frpl;
- } else {
+ else
/* Registering protection buffer */
mr = fr_desc->pi_ctx->prot_mr;
- frpl = fr_desc->pi_ctx->prot_frpl;
- }
-
- page_off = mem->offset % PAGE_SIZE;
-
- isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
- fr_desc, mem->nents, mem->offset);
-
- pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents,
- &frpl->page_list[0]);
if (!(fr_desc->ind & ind)) {
isert_inv_rkey(&inv_wr, mr);
wr = &inv_wr;
}
- /* Prepare FASTREG WR */
- memset(&fr_wr, 0, sizeof(fr_wr));
- fr_wr.wr_id = ISER_FASTREG_LI_WRID;
- fr_wr.opcode = IB_WR_FAST_REG_MR;
- fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off;
- fr_wr.wr.fast_reg.page_list = frpl;
- fr_wr.wr.fast_reg.page_list_len = pagelist_len;
- fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fr_wr.wr.fast_reg.length = mem->len;
- fr_wr.wr.fast_reg.rkey = mr->rkey;
- fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
+ n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE);
+ if (unlikely(n != mem->nents)) {
+ isert_err("failed to map mr sg (%d/%d)\n",
+ n, mem->nents);
+ return n < 0 ? n : -EINVAL;
+ }
+
+ isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
+ fr_desc, mem->nents, mem->offset);
+
+ reg_wr.wr.next = NULL;
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
+ reg_wr.wr.send_flags = 0;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.mr = mr;
+ reg_wr.key = mr->lkey;
+ reg_wr.access = IB_ACCESS_LOCAL_WRITE;
if (!wr)
- wr = &fr_wr;
+ wr = &reg_wr.wr;
else
- wr->next = &fr_wr;
+ wr->next = &reg_wr.wr;
ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
if (ret) {
@@ -2662,8 +2587,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
fr_desc->ind &= ~ind;
sge->lkey = mr->lkey;
- sge->addr = frpl->page_list[0] + page_off;
- sge->length = mem->len;
+ sge->addr = mr->iova;
+ sge->length = mr->length;
isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
sge->addr, sge->length, sge->lkey);
@@ -2733,8 +2658,8 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
struct isert_rdma_wr *rdma_wr,
struct fast_reg_descriptor *fr_desc)
{
- struct ib_send_wr sig_wr, inv_wr;
- struct ib_send_wr *bad_wr, *wr = NULL;
+ struct ib_sig_handover_wr sig_wr;
+ struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
struct pi_context *pi_ctx = fr_desc->pi_ctx;
struct ib_sig_attrs sig_attrs;
int ret;
@@ -2752,20 +2677,20 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
}
memset(&sig_wr, 0, sizeof(sig_wr));
- sig_wr.opcode = IB_WR_REG_SIG_MR;
- sig_wr.wr_id = ISER_FASTREG_LI_WRID;
- sig_wr.sg_list = &rdma_wr->ib_sg[DATA];
- sig_wr.num_sge = 1;
- sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE;
- sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
- sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+ sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
+ sig_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
+ sig_wr.wr.sg_list = &rdma_wr->ib_sg[DATA];
+ sig_wr.wr.num_sge = 1;
+ sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
+ sig_wr.sig_attrs = &sig_attrs;
+ sig_wr.sig_mr = pi_ctx->sig_mr;
if (se_cmd->t_prot_sg)
- sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT];
+ sig_wr.prot = &rdma_wr->ib_sg[PROT];
if (!wr)
- wr = &sig_wr;
+ wr = &sig_wr.wr;
else
- wr->next = &sig_wr;
+ wr->next = &sig_wr.wr;
ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
if (ret) {
@@ -2859,7 +2784,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
struct fast_reg_descriptor *fr_desc = NULL;
- struct ib_send_wr *send_wr;
+ struct ib_rdma_wr *rdma_wr;
struct ib_sge *ib_sg;
u32 offset;
int ret = 0;
@@ -2900,26 +2825,26 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg));
wr->ib_sge = &wr->s_ib_sge;
- wr->send_wr_num = 1;
- memset(&wr->s_send_wr, 0, sizeof(*send_wr));
- wr->send_wr = &wr->s_send_wr;
+ wr->rdma_wr_num = 1;
+ memset(&wr->s_rdma_wr, 0, sizeof(wr->s_rdma_wr));
+ wr->rdma_wr = &wr->s_rdma_wr;
wr->isert_cmd = isert_cmd;
- send_wr = &isert_cmd->rdma_wr.s_send_wr;
- send_wr->sg_list = &wr->s_ib_sge;
- send_wr->num_sge = 1;
- send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
+ rdma_wr = &isert_cmd->rdma_wr.s_rdma_wr;
+ rdma_wr->wr.sg_list = &wr->s_ib_sge;
+ rdma_wr->wr.num_sge = 1;
+ rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
- send_wr->opcode = IB_WR_RDMA_WRITE;
- send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
- send_wr->wr.rdma.rkey = isert_cmd->read_stag;
- send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ rdma_wr->remote_addr = isert_cmd->read_va;
+ rdma_wr->rkey = isert_cmd->read_stag;
+ rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
0 : IB_SEND_SIGNALED;
} else {
- send_wr->opcode = IB_WR_RDMA_READ;
- send_wr->wr.rdma.remote_addr = isert_cmd->write_va;
- send_wr->wr.rdma.rkey = isert_cmd->write_stag;
- send_wr->send_flags = IB_SEND_SIGNALED;
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ rdma_wr->remote_addr = isert_cmd->write_va;
+ rdma_wr->rkey = isert_cmd->write_stag;
+ rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
}
return 0;
@@ -2967,8 +2892,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
isert_init_send_wr(isert_conn, isert_cmd,
&isert_cmd->tx_desc.send_wr);
- isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
- wr->send_wr_num += 1;
+ isert_cmd->rdma_wr.s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
+ wr->rdma_wr_num += 1;
rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
if (rc) {
@@ -2977,7 +2902,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
}
}
- rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
+ rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
if (rc)
isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
@@ -3011,7 +2936,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
return rc;
}
- rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
+ rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
if (rc)
isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
@@ -3097,7 +3022,7 @@ isert_setup_id(struct isert_np *isert_np)
sa = (struct sockaddr *)&np->np_sockaddr;
isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa);
- id = rdma_create_id(isert_cma_handler, isert_np,
+ id = rdma_create_id(&init_net, isert_cma_handler, isert_np,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(id)) {
isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id));
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index c5b99bcecbcf..3d7fbc47c343 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -84,14 +84,12 @@ enum isert_indicator {
struct pi_context {
struct ib_mr *prot_mr;
- struct ib_fast_reg_page_list *prot_frpl;
struct ib_mr *sig_mr;
};
struct fast_reg_descriptor {
struct list_head list;
struct ib_mr *data_mr;
- struct ib_fast_reg_page_list *data_frpl;
u8 ind;
struct pi_context *pi_ctx;
};
@@ -117,9 +115,9 @@ struct isert_rdma_wr {
enum iser_ib_op_code iser_ib_op;
struct ib_sge *ib_sge;
struct ib_sge s_ib_sge;
- int send_wr_num;
- struct ib_send_wr *send_wr;
- struct ib_send_wr s_send_wr;
+ int rdma_wr_num;
+ struct ib_rdma_wr *rdma_wr;
+ struct ib_rdma_wr s_rdma_wr;
struct ib_sge ib_sg[3];
struct isert_data_buf data;
struct isert_data_buf prot;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index b481490ad257..32f79624dd28 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -340,8 +340,6 @@ static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
return;
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
- if (d->frpl)
- ib_free_fast_reg_page_list(d->frpl);
if (d->mr)
ib_dereg_mr(d->mr);
}
@@ -362,7 +360,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
struct srp_fr_pool *pool;
struct srp_fr_desc *d;
struct ib_mr *mr;
- struct ib_fast_reg_page_list *frpl;
int i, ret = -EINVAL;
if (pool_size <= 0)
@@ -385,12 +382,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
goto destroy_pool;
}
d->mr = mr;
- frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
- if (IS_ERR(frpl)) {
- ret = PTR_ERR(frpl);
- goto destroy_pool;
- }
- d->frpl = frpl;
list_add_tail(&d->entry, &pool->free_list);
}
@@ -849,11 +840,12 @@ static void srp_free_req_data(struct srp_target_port *target,
for (i = 0; i < target->req_ring_size; ++i) {
req = &ch->req_ring[i];
- if (dev->use_fast_reg)
+ if (dev->use_fast_reg) {
kfree(req->fr_list);
- else
+ } else {
kfree(req->fmr_list);
- kfree(req->map_page);
+ kfree(req->map_page);
+ }
if (req->indirect_dma_addr) {
ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
target->indirect_size,
@@ -887,14 +879,15 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
GFP_KERNEL);
if (!mr_list)
goto out;
- if (srp_dev->use_fast_reg)
+ if (srp_dev->use_fast_reg) {
req->fr_list = mr_list;
- else
+ } else {
req->fmr_list = mr_list;
- req->map_page = kmalloc(srp_dev->max_pages_per_mr *
- sizeof(void *), GFP_KERNEL);
- if (!req->map_page)
- goto out;
+ req->map_page = kmalloc(srp_dev->max_pages_per_mr *
+ sizeof(void *), GFP_KERNEL);
+ if (!req->map_page)
+ goto out;
+ }
req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
if (!req->indirect_desc)
goto out;
@@ -1286,6 +1279,17 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
if (state->fmr.next >= state->fmr.end)
return -ENOMEM;
+ WARN_ON_ONCE(!dev->use_fmr);
+
+ if (state->npages == 0)
+ return 0;
+
+ if (state->npages == 1 && target->global_mr) {
+ srp_map_desc(state, state->base_dma_addr, state->dma_len,
+ target->global_mr->rkey);
+ goto reset_state;
+ }
+
fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
state->npages, io_addr);
if (IS_ERR(fmr))
@@ -1297,6 +1301,10 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
state->dma_len, fmr->fmr->rkey);
+reset_state:
+ state->npages = 0;
+ state->dma_len = 0;
+
return 0;
}
@@ -1306,13 +1314,26 @@ static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_send_wr *bad_wr;
- struct ib_send_wr wr;
+ struct ib_reg_wr wr;
struct srp_fr_desc *desc;
u32 rkey;
+ int n, err;
if (state->fr.next >= state->fr.end)
return -ENOMEM;
+ WARN_ON_ONCE(!dev->use_fast_reg);
+
+ if (state->sg_nents == 0)
+ return 0;
+
+ if (state->sg_nents == 1 && target->global_mr) {
+ srp_map_desc(state, sg_dma_address(state->sg),
+ sg_dma_len(state->sg),
+ target->global_mr->rkey);
+ return 1;
+ }
+
desc = srp_fr_pool_get(ch->fr_pool);
if (!desc)
return -ENOMEM;
@@ -1320,56 +1341,33 @@ static int srp_map_finish_fr(struct srp_map_state *state,
rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey);
- memcpy(desc->frpl->page_list, state->pages,
- sizeof(state->pages[0]) * state->npages);
-
- memset(&wr, 0, sizeof(wr));
- wr.opcode = IB_WR_FAST_REG_MR;
- wr.wr_id = FAST_REG_WR_ID_MASK;
- wr.wr.fast_reg.iova_start = state->base_dma_addr;
- wr.wr.fast_reg.page_list = desc->frpl;
- wr.wr.fast_reg.page_list_len = state->npages;
- wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
- wr.wr.fast_reg.length = state->dma_len;
- wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE);
- wr.wr.fast_reg.rkey = desc->mr->lkey;
+ n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents,
+ dev->mr_page_size);
+ if (unlikely(n < 0))
+ return n;
+
+ wr.wr.next = NULL;
+ wr.wr.opcode = IB_WR_REG_MR;
+ wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+ wr.wr.num_sge = 0;
+ wr.wr.send_flags = 0;
+ wr.mr = desc->mr;
+ wr.key = desc->mr->rkey;
+ wr.access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE);
*state->fr.next++ = desc;
state->nmdesc++;
- srp_map_desc(state, state->base_dma_addr, state->dma_len,
- desc->mr->rkey);
+ srp_map_desc(state, desc->mr->iova,
+ desc->mr->length, desc->mr->rkey);
- return ib_post_send(ch->qp, &wr, &bad_wr);
-}
+ err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
+ if (unlikely(err))
+ return err;
-static int srp_finish_mapping(struct srp_map_state *state,
- struct srp_rdma_ch *ch)
-{
- struct srp_target_port *target = ch->target;
- struct srp_device *dev = target->srp_host->srp_dev;
- int ret = 0;
-
- WARN_ON_ONCE(!dev->use_fast_reg && !dev->use_fmr);
-
- if (state->npages == 0)
- return 0;
-
- if (state->npages == 1 && target->global_mr)
- srp_map_desc(state, state->base_dma_addr, state->dma_len,
- target->global_mr->rkey);
- else
- ret = dev->use_fast_reg ? srp_map_finish_fr(state, ch) :
- srp_map_finish_fmr(state, ch);
-
- if (ret == 0) {
- state->npages = 0;
- state->dma_len = 0;
- }
-
- return ret;
+ return n;
}
static int srp_map_sg_entry(struct srp_map_state *state,
@@ -1389,7 +1387,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
while (dma_len) {
unsigned offset = dma_addr & ~dev->mr_page_mask;
if (state->npages == dev->max_pages_per_mr || offset != 0) {
- ret = srp_finish_mapping(state, ch);
+ ret = srp_map_finish_fmr(state, ch);
if (ret)
return ret;
}
@@ -1411,51 +1409,83 @@ static int srp_map_sg_entry(struct srp_map_state *state,
*/
ret = 0;
if (len != dev->mr_page_size)
- ret = srp_finish_mapping(state, ch);
+ ret = srp_map_finish_fmr(state, ch);
return ret;
}
-static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
- struct srp_request *req, struct scatterlist *scat,
- int count)
+static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
+ struct srp_request *req, struct scatterlist *scat,
+ int count)
{
- struct srp_target_port *target = ch->target;
- struct srp_device *dev = target->srp_host->srp_dev;
struct scatterlist *sg;
int i, ret;
- state->desc = req->indirect_desc;
- state->pages = req->map_page;
- if (dev->use_fast_reg) {
- state->fr.next = req->fr_list;
- state->fr.end = req->fr_list + target->cmd_sg_cnt;
- } else if (dev->use_fmr) {
- state->fmr.next = req->fmr_list;
- state->fmr.end = req->fmr_list + target->cmd_sg_cnt;
- }
+ state->desc = req->indirect_desc;
+ state->pages = req->map_page;
+ state->fmr.next = req->fmr_list;
+ state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
- if (dev->use_fast_reg || dev->use_fmr) {
- for_each_sg(scat, sg, count, i) {
- ret = srp_map_sg_entry(state, ch, sg, i);
- if (ret)
- goto out;
- }
- ret = srp_finish_mapping(state, ch);
+ for_each_sg(scat, sg, count, i) {
+ ret = srp_map_sg_entry(state, ch, sg, i);
if (ret)
- goto out;
- } else {
- for_each_sg(scat, sg, count, i) {
- srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
- ib_sg_dma_len(dev->dev, sg),
- target->global_mr->rkey);
- }
+ return ret;
}
+ ret = srp_map_finish_fmr(state, ch);
+ if (ret)
+ return ret;
+
req->nmdesc = state->nmdesc;
- ret = 0;
-out:
- return ret;
+ return 0;
+}
+
+static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
+ struct srp_request *req, struct scatterlist *scat,
+ int count)
+{
+ state->desc = req->indirect_desc;
+ state->fr.next = req->fr_list;
+ state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
+ state->sg = scat;
+ state->sg_nents = scsi_sg_count(req->scmnd);
+
+ while (state->sg_nents) {
+ int i, n;
+
+ n = srp_map_finish_fr(state, ch);
+ if (unlikely(n < 0))
+ return n;
+
+ state->sg_nents -= n;
+ for (i = 0; i < n; i++)
+ state->sg = sg_next(state->sg);
+ }
+
+ req->nmdesc = state->nmdesc;
+
+ return 0;
+}
+
+static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
+ struct srp_request *req, struct scatterlist *scat,
+ int count)
+{
+ struct srp_target_port *target = ch->target;
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct scatterlist *sg;
+ int i;
+
+ state->desc = req->indirect_desc;
+ for_each_sg(scat, sg, count, i) {
+ srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
+ ib_sg_dma_len(dev->dev, sg),
+ target->global_mr->rkey);
+ }
+
+ req->nmdesc = state->nmdesc;
+
+ return 0;
}
/*
@@ -1474,6 +1504,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
struct srp_map_state state;
struct srp_direct_buf idb_desc;
u64 idb_pages[1];
+ struct scatterlist idb_sg[1];
int ret;
memset(&state, 0, sizeof(state));
@@ -1481,20 +1512,32 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
state.gen.next = next_mr;
state.gen.end = end_mr;
state.desc = &idb_desc;
- state.pages = idb_pages;
- state.pages[0] = (req->indirect_dma_addr &
- dev->mr_page_mask);
- state.npages = 1;
state.base_dma_addr = req->indirect_dma_addr;
state.dma_len = idb_len;
- ret = srp_finish_mapping(&state, ch);
- if (ret < 0)
- goto out;
+
+ if (dev->use_fast_reg) {
+ state.sg = idb_sg;
+ state.sg_nents = 1;
+ sg_set_buf(idb_sg, req->indirect_desc, idb_len);
+ idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
+ ret = srp_map_finish_fr(&state, ch);
+ if (ret < 0)
+ return ret;
+ } else if (dev->use_fmr) {
+ state.pages = idb_pages;
+ state.pages[0] = (req->indirect_dma_addr &
+ dev->mr_page_mask);
+ state.npages = 1;
+ ret = srp_map_finish_fmr(&state, ch);
+ if (ret < 0)
+ return ret;
+ } else {
+ return -EINVAL;
+ }
*idb_rkey = idb_desc.key;
-out:
- return ret;
+ return 0;
}
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
@@ -1563,7 +1606,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
target->indirect_size, DMA_TO_DEVICE);
memset(&state, 0, sizeof(state));
- srp_map_sg(&state, ch, req, scat, count);
+ if (dev->use_fast_reg)
+ srp_map_sg_fr(&state, ch, req, scat, count);
+ else if (dev->use_fmr)
+ srp_map_sg_fmr(&state, ch, req, scat, count);
+ else
+ srp_map_sg_dma(&state, ch, req, scat, count);
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -3213,7 +3261,7 @@ static ssize_t srp_create_target(struct device *dev,
INIT_WORK(&target->tl_err_work, srp_tl_err_work);
INIT_WORK(&target->remove_work, srp_remove_work);
spin_lock_init(&target->lock);
- ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
+ ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
if (ret)
goto out;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 3608f2e4819c..87a2a919dc43 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -242,7 +242,6 @@ struct srp_iu {
struct srp_fr_desc {
struct list_head entry;
struct ib_mr *mr;
- struct ib_fast_reg_page_list *frpl;
};
/**
@@ -294,11 +293,17 @@ struct srp_map_state {
} gen;
};
struct srp_direct_buf *desc;
- u64 *pages;
+ union {
+ u64 *pages;
+ struct scatterlist *sg;
+ };
dma_addr_t base_dma_addr;
u32 dma_len;
u32 total_len;
- unsigned int npages;
+ union {
+ unsigned int npages;
+ int sg_nents;
+ };
unsigned int nmdesc;
unsigned int ndesc;
};
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index f6fe0414139b..47c4022fda76 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -546,7 +546,8 @@ static int srpt_refresh_port(struct srpt_port *sport)
sport->sm_lid = port_attr.sm_lid;
sport->lid = port_attr.lid;
- ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
+ ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid,
+ NULL);
if (ret)
goto err_query_port;
@@ -2822,7 +2823,7 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx)
{
- struct ib_send_wr wr;
+ struct ib_rdma_wr wr;
struct ib_send_wr *bad_wr;
struct rdma_iu *riu;
int i;
@@ -2850,29 +2851,29 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
for (i = 0; i < n_rdma; ++i, ++riu) {
if (dir == DMA_FROM_DEVICE) {
- wr.opcode = IB_WR_RDMA_WRITE;
- wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
+ wr.wr.opcode = IB_WR_RDMA_WRITE;
+ wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
SRPT_RDMA_WRITE_LAST :
SRPT_RDMA_MID,
ioctx->ioctx.index);
} else {
- wr.opcode = IB_WR_RDMA_READ;
- wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
+ wr.wr.opcode = IB_WR_RDMA_READ;
+ wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
SRPT_RDMA_READ_LAST :
SRPT_RDMA_MID,
ioctx->ioctx.index);
}
- wr.next = NULL;
- wr.wr.rdma.remote_addr = riu->raddr;
- wr.wr.rdma.rkey = riu->rkey;
- wr.num_sge = riu->sge_cnt;
- wr.sg_list = riu->sge;
+ wr.wr.next = NULL;
+ wr.remote_addr = riu->raddr;
+ wr.rkey = riu->rkey;
+ wr.wr.num_sge = riu->sge_cnt;
+ wr.wr.sg_list = riu->sge;
/* only get completion event for the last rdma write */
if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
- wr.send_flags = IB_SEND_SIGNALED;
+ wr.wr.send_flags = IB_SEND_SIGNALED;
- ret = ib_post_send(ch->qp, &wr, &bad_wr);
+ ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
if (ret)
break;
}
@@ -2881,11 +2882,11 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
__func__, __LINE__, ret, i, n_rdma);
if (ret && i > 0) {
- wr.num_sge = 0;
- wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
- wr.send_flags = IB_SEND_SIGNALED;
+ wr.wr.num_sge = 0;
+ wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
+ wr.wr.send_flags = IB_SEND_SIGNALED;
while (ch->state == CH_LIVE &&
- ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
+ ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
pr_info("Trying to abort failed RDMA transfer [%d]\n",
ioctx->ioctx.index);
msleep(1000);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 414fe7c487d5..55a47de544ea 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -49,6 +49,7 @@
#include <linux/etherdevice.h>
#include <linux/net_tstamp.h>
#include <asm/io.h>
+#include "t4_chip_type.h"
#include "cxgb4_uld.h"
#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
@@ -291,31 +292,6 @@ struct pci_params {
unsigned char width;
};
-#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision))
-#define CHELSIO_CHIP_FPGA 0x100
-#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf)
-#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf)
-
-#define CHELSIO_T4 0x4
-#define CHELSIO_T5 0x5
-#define CHELSIO_T6 0x6
-
-enum chip_type {
- T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1),
- T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2),
- T4_FIRST_REV = T4_A1,
- T4_LAST_REV = T4_A2,
-
- T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0),
- T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1),
- T5_FIRST_REV = T5_A0,
- T5_LAST_REV = T5_A1,
-
- T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0),
- T6_FIRST_REV = T6_A0,
- T6_LAST_REV = T6_A0,
-};
-
struct devlog_params {
u32 memtype; /* which memory (EDC0, EDC1, MC) */
u32 start; /* start of log in firmware memory */
@@ -909,21 +885,6 @@ static inline int is_offload(const struct adapter *adap)
return adap->params.offload;
}
-static inline int is_t6(enum chip_type chip)
-{
- return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6;
-}
-
-static inline int is_t5(enum chip_type chip)
-{
- return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5;
-}
-
-static inline int is_t4(enum chip_type chip)
-{
- return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4;
-}
-
static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
{
return readl(adap->regs + reg_addr);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 2cf81857a297..0d147610a06f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1941,6 +1941,28 @@ unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
/**
+ * cxgb4_tp_smt_idx - Get the Source Mac Table index for this VI
+ * @chip: chip type
+ * @viid: VI id of the given port
+ *
+ * Return the SMT index for this VI.
+ */
+unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid)
+{
+ /* In T4/T5, SMT contains 256 SMAC entries organized in
+ * 128 rows of 2 entries each.
+ * In T6, SMT contains 256 SMAC entries in 256 rows.
+ * TODO: The below code needs to be updated when we add support
+ * for 256 VFs.
+ */
+ if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
+ return ((viid & 0x7f) << 1);
+ else
+ return (viid & 0x7f);
+}
+EXPORT_SYMBOL(cxgb4_tp_smt_idx);
+
+/**
* cxgb4_port_chan - get the HW channel of a port
* @dev: the net device for the port
*
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index c3a8be5541e7..cf711d5f15be 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -40,6 +40,7 @@
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/atomic.h>
+#include "cxgb4.h"
/* CPL message priority levels */
enum {
@@ -290,6 +291,7 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
unsigned int cxgb4_port_chan(const struct net_device *dev);
unsigned int cxgb4_port_viid(const struct net_device *dev);
+unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
unsigned int cxgb4_port_idx(const struct net_device *dev);
unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
unsigned int *idx);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
new file mode 100644
index 000000000000..54b718111e3f
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
@@ -0,0 +1,85 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2015 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __T4_CHIP_TYPE_H__
+#define __T4_CHIP_TYPE_H__
+
+#define CHELSIO_T4 0x4
+#define CHELSIO_T5 0x5
+#define CHELSIO_T6 0x6
+
+/* We code the Chelsio T4 Family "Chip Code" as a tuple:
+ *
+ * (Chip Version, Chip Revision)
+ *
+ * where:
+ *
+ * Chip Version: is T4, T5, etc.
+ * Chip Revision: is the FAB "spin" of the Chip Version.
+ */
+#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision))
+#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf)
+#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf)
+
+enum chip_type {
+ T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1),
+ T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2),
+ T4_FIRST_REV = T4_A1,
+ T4_LAST_REV = T4_A2,
+
+ T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0),
+ T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1),
+ T5_FIRST_REV = T5_A0,
+ T5_LAST_REV = T5_A1,
+
+ T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0),
+ T6_FIRST_REV = T6_A0,
+ T6_LAST_REV = T6_A0,
+};
+
+static inline int is_t4(enum chip_type chip)
+{
+ return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4);
+}
+
+static inline int is_t5(enum chip_type chip)
+{
+ return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5);
+}
+
+static inline int is_t6(enum chip_type chip)
+{
+ return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6);
+}
+
+#endif /* __T4_CHIP_TYPE_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index b99144afd4ec..a072d341e205 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -417,6 +417,21 @@ struct cpl_t5_act_open_req {
__be64 params;
};
+struct cpl_t6_act_open_req {
+ WR_HDR;
+ union opcode_tid ot;
+ __be16 local_port;
+ __be16 peer_port;
+ __be32 local_ip;
+ __be32 peer_ip;
+ __be64 opt0;
+ __be32 rsvd;
+ __be32 opt2;
+ __be64 params;
+ __be32 rsvd2;
+ __be32 opt3;
+};
+
struct cpl_act_open_req6 {
WR_HDR;
union opcode_tid ot;
@@ -446,6 +461,23 @@ struct cpl_t5_act_open_req6 {
__be64 params;
};
+struct cpl_t6_act_open_req6 {
+ WR_HDR;
+ union opcode_tid ot;
+ __be16 local_port;
+ __be16 peer_port;
+ __be64 local_ip_hi;
+ __be64 local_ip_lo;
+ __be64 peer_ip_hi;
+ __be64 peer_ip_lo;
+ __be64 opt0;
+ __be32 rsvd;
+ __be32 opt2;
+ __be64 params;
+ __be32 rsvd2;
+ __be32 opt3;
+};
+
struct cpl_act_open_rpl {
union opcode_tid ot;
__be32 atid_status;
@@ -504,6 +536,19 @@ struct cpl_pass_establish {
#define TCPOPT_MSS_M 0xF
#define TCPOPT_MSS_G(x) (((x) >> TCPOPT_MSS_S) & TCPOPT_MSS_M)
+#define T6_TCP_HDR_LEN_S 8
+#define T6_TCP_HDR_LEN_V(x) ((x) << T6_TCP_HDR_LEN_S)
+#define T6_TCP_HDR_LEN_G(x) (((x) >> T6_TCP_HDR_LEN_S) & TCP_HDR_LEN_M)
+
+#define T6_IP_HDR_LEN_S 14
+#define T6_IP_HDR_LEN_V(x) ((x) << T6_IP_HDR_LEN_S)
+#define T6_IP_HDR_LEN_G(x) (((x) >> T6_IP_HDR_LEN_S) & IP_HDR_LEN_M)
+
+#define T6_ETH_HDR_LEN_S 24
+#define T6_ETH_HDR_LEN_M 0xFF
+#define T6_ETH_HDR_LEN_V(x) ((x) << T6_ETH_HDR_LEN_S)
+#define T6_ETH_HDR_LEN_G(x) (((x) >> T6_ETH_HDR_LEN_S) & T6_ETH_HDR_LEN_M)
+
struct cpl_act_establish {
union opcode_tid ot;
__be32 rsvd;
@@ -833,6 +878,9 @@ struct cpl_rx_pkt {
__be16 err_vec;
};
+#define RX_T6_ETHHDR_LEN_M 0xFF
+#define RX_T6_ETHHDR_LEN_G(x) (((x) >> RX_ETHHDR_LEN_S) & RX_T6_ETHHDR_LEN_M)
+
#define RXF_PSH_S 20
#define RXF_PSH_V(x) ((x) << RXF_PSH_S)
#define RXF_PSH_F RXF_PSH_V(1U)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index a946e4bf71d2..005f910ec955 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -123,6 +123,28 @@ void mlx4_en_update_loopback_state(struct net_device *dev,
*/
if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback)
priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK;
+
+ mutex_lock(&priv->mdev->state_lock);
+ if (priv->mdev->dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB &&
+ priv->rss_map.indir_qp.qpn) {
+ int i;
+ int err = 0;
+ int loopback = !!(features & NETIF_F_LOOPBACK);
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ int ret;
+
+ ret = mlx4_en_change_mcast_lb(priv,
+ &priv->rss_map.qps[i],
+ loopback);
+ if (!err)
+ err = ret;
+ }
+ if (err)
+ mlx4_warn(priv->mdev, "failed to change mcast loopback\n");
+ }
+ mutex_unlock(&priv->mdev->state_lock);
}
static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index e482fa1bb741..12aab5a659d3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -69,6 +69,15 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
context->pri_path.counter_index = priv->counter_index;
context->cqn_send = cpu_to_be32(cqn);
context->cqn_recv = cpu_to_be32(cqn);
+ if (!rss &&
+ (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) &&
+ context->pri_path.counter_index !=
+ MLX4_SINK_COUNTER_INDEX(mdev->dev)) {
+ /* disable multicast loopback to qp with same counter */
+ if (!(dev->features & NETIF_F_LOOPBACK))
+ context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ context->pri_path.control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ }
context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2);
if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX))
context->param3 |= cpu_to_be32(1 << 30);
@@ -80,6 +89,22 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
}
}
+int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
+ int loopback)
+{
+ int ret;
+ struct mlx4_update_qp_params qp_params;
+
+ memset(&qp_params, 0, sizeof(qp_params));
+ if (!loopback)
+ qp_params.flags = MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB;
+
+ ret = mlx4_update_qp(priv->mdev->dev, qp->qpn,
+ MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB,
+ &qp_params);
+
+ return ret;
+}
int mlx4_en_map_buffer(struct mlx4_buf *buf)
{
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f13a4d7bbf95..90db94e83fde 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -155,6 +155,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[27] = "Port beacon support",
[28] = "RX-ALL support",
[29] = "802.1ad offload support",
+ [31] = "Modifying loopback source checks using UPDATE_QP support",
+ [32] = "Loopback source checks support",
};
int i;
@@ -964,6 +966,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
if (field32 & (1 << 16))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
+ if (field32 & (1 << 18))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB;
+ if (field32 & (1 << 19))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK;
if (field32 & (1 << 26))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
if (field32 & (1 << 20))
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index defcf8c395bf..c41f15102ae0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -798,7 +798,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
int mlx4_en_map_buffer(struct mlx4_buf *buf);
void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
-
+int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
+ int loopback);
void mlx4_en_calc_rx_buf(struct net_device *dev);
int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 3311f35d08e0..168823dde79f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -436,6 +436,23 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
}
+ if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) {
+ if (!(dev->caps.flags2
+ & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+ mlx4_warn(dev,
+ "Trying to set src check LB, but it isn't supported\n");
+ err = -ENOTSUPP;
+ goto out;
+ }
+ pri_addr_path_mask |=
+ 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB;
+ if (params->flags &
+ MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) {
+ cmd->qp_context.pri_path.fl |=
+ MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ }
+ }
+
if (attr & MLX4_UPDATE_QP_VSD) {
qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD;
if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE)
@@ -458,7 +475,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0,
MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_NATIVE);
-
+out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index ac4b99ab1f85..9813d34f3e5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -770,9 +770,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
}
}
+ /* preserve IF_COUNTER flag */
+ qpc->pri_path.vlan_control &=
+ MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE &&
dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
- qpc->pri_path.vlan_control =
+ qpc->pri_path.vlan_control |=
MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED |
@@ -780,12 +783,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
} else if (0 != vp_oper->state.default_vlan) {
- qpc->pri_path.vlan_control =
+ qpc->pri_path.vlan_control |=
MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
} else { /* priority tagged */
- qpc->pri_path.vlan_control =
+ qpc->pri_path.vlan_control |=
MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
}
@@ -3764,9 +3767,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
update_gid(dev, inbox, (u8)slave);
adjust_proxy_tun_qkey(dev, vhcr, qpc);
orig_sched_queue = qpc->pri_path.sched_queue;
- err = update_vport_qp_param(dev, inbox, slave, qpn);
- if (err)
- return err;
err = get_res(dev, slave, qpn, RES_QP, &qp);
if (err)
@@ -3776,6 +3776,10 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
goto out;
}
+ err = update_vport_qp_param(dev, inbox, slave, qpn);
+ if (err)
+ goto out;
+
err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
out:
/* if no error, save sched queue value passed in by VF. This is
@@ -4210,7 +4214,9 @@ static int add_eth_header(struct mlx4_dev *dev, int slave,
}
-#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX)
+#define MLX4_UPD_QP_PATH_MASK_SUPPORTED ( \
+ 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX |\
+ 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)
int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@@ -4233,6 +4239,16 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
(pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED))
return -EPERM;
+ if ((pri_addr_path_mask &
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
+ !(dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+ mlx4_warn(dev,
+ "Src check LB for slave %d isn't supported\n",
+ slave);
+ return -ENOTSUPP;
+ }
+
/* Just change the smac for the QP */
err = get_res(dev, slave, qpn, RES_QP, &rqp);
if (err) {
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 5f78b42b427a..263db37de7c8 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -123,7 +123,9 @@ extern kib_tunables_t kiblnd_tunables;
IBLND_CREDIT_HIGHWATER_V1 : \
*kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
+#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \
+ cb, dev, \
+ ps, qpt)
static inline int
kiblnd_concurrent_sends_v1(void)
@@ -504,7 +506,7 @@ typedef struct kib_tx /* transmit message */
__u64 tx_msgaddr; /* message buffer (I/O addr) */
DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */
int tx_nwrq; /* # send work items */
- struct ib_send_wr *tx_wrq; /* send work items... */
+ struct ib_rdma_wr *tx_wrq; /* send work items... */
struct ib_sge *tx_sge; /* ...and their memory */
kib_rdma_desc_t *tx_rd; /* rdma descriptor */
int tx_nfrags; /* # entries in... */
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 8989e36091fb..260750354a41 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -838,7 +838,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
/* close_conn will launch failover */
rc = -ENETDOWN;
} else {
- rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &bad_wrq);
+ rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &bad_wrq);
}
conn->ibc_last_send = jiffies;
@@ -1012,7 +1012,7 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
{
kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
- struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
+ struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
int nob = offsetof(kib_msg_t, ibm_u) + body_nob;
struct ib_mr *mr;
@@ -1031,12 +1031,12 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
memset(wrq, 0, sizeof(*wrq));
- wrq->next = NULL;
- wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
- wrq->sg_list = sge;
- wrq->num_sge = 1;
- wrq->opcode = IB_WR_SEND;
- wrq->send_flags = IB_SEND_SIGNALED;
+ wrq->wr.next = NULL;
+ wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
+ wrq->wr.sg_list = sge;
+ wrq->wr.num_sge = 1;
+ wrq->wr.opcode = IB_WR_SEND;
+ wrq->wr.send_flags = IB_SEND_SIGNALED;
tx->tx_nwrq++;
}
@@ -1048,7 +1048,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
kib_msg_t *ibmsg = tx->tx_msg;
kib_rdma_desc_t *srcrd = tx->tx_rd;
struct ib_sge *sge = &tx->tx_sge[0];
- struct ib_send_wr *wrq = &tx->tx_wrq[0];
+ struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next;
int rc = resid;
int srcidx;
int dstidx;
@@ -1094,16 +1094,17 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
sge->length = wrknob;
wrq = &tx->tx_wrq[tx->tx_nwrq];
+ next = wrq + 1;
- wrq->next = wrq + 1;
- wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
- wrq->sg_list = sge;
- wrq->num_sge = 1;
- wrq->opcode = IB_WR_RDMA_WRITE;
- wrq->send_flags = 0;
+ wrq->wr.next = &next->wr;
+ wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
+ wrq->wr.sg_list = sge;
+ wrq->wr.num_sge = 1;
+ wrq->wr.opcode = IB_WR_RDMA_WRITE;
+ wrq->wr.send_flags = 0;
- wrq->wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
- wrq->wr.rdma.rkey = kiblnd_rd_frag_key(dstrd, dstidx);
+ wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
+ wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx);
srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c
index e0a7aff0eb2a..ca364dbe369c 100644
--- a/drivers/staging/rdma/amso1100/c2_qp.c
+++ b/drivers/staging/rdma/amso1100/c2_qp.c
@@ -860,9 +860,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
flags |= SQ_READ_FENCE;
}
wr.sqwr.rdma_write.remote_stag =
- cpu_to_be32(ib_wr->wr.rdma.rkey);
+ cpu_to_be32(rdma_wr(ib_wr)->rkey);
wr.sqwr.rdma_write.remote_to =
- cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+ cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
err = move_sgl((struct c2_data_addr *)
& (wr.sqwr.rdma_write.data),
ib_wr->sg_list,
@@ -889,9 +889,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
wr.sqwr.rdma_read.local_to =
cpu_to_be64(ib_wr->sg_list->addr);
wr.sqwr.rdma_read.remote_stag =
- cpu_to_be32(ib_wr->wr.rdma.rkey);
+ cpu_to_be32(rdma_wr(ib_wr)->rkey);
wr.sqwr.rdma_read.remote_to =
- cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+ cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
wr.sqwr.rdma_read.length =
cpu_to_be32(ib_wr->sg_list->length);
break;
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
index 47f94984353d..10e2074384f5 100644
--- a/drivers/staging/rdma/ehca/ehca_reqs.c
+++ b/drivers/staging/rdma/ehca/ehca_reqs.c
@@ -110,19 +110,19 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
/* need ib_mad struct */
#include <rdma/ib_mad.h>
-static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
{
int idx;
int j;
- while (send_wr) {
- struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
- struct ib_sge *sge = send_wr->sg_list;
- ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
- "send_flags=%x opcode=%x", idx, send_wr->wr_id,
- send_wr->num_sge, send_wr->send_flags,
- send_wr->opcode);
+ while (ud_wr) {
+ struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
+ struct ib_sge *sge = ud_wr->wr.sg_list;
+ ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
+ "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
+ ud_wr->wr.num_sge, ud_wr->wr.send_flags,
+ ud_wr->.wr.opcode);
if (mad_hdr) {
- ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+ ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
"mgmt_class=%x class_version=%x method=%x "
"status=%x class_specific=%x tid=%lx "
"attr_id=%x resv=%x attr_mod=%x",
@@ -134,33 +134,33 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
mad_hdr->resv,
mad_hdr->attr_mod);
}
- for (j = 0; j < send_wr->num_sge; j++) {
+ for (j = 0; j < ud_wr->wr.num_sge; j++) {
u8 *data = __va(sge->addr);
- ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+ ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
"lkey=%x",
idx, j, data, sge->length, sge->lkey);
/* assume length is n*16 */
- ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+ ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
idx, j);
sge++;
} /* eof for j */
idx++;
- send_wr = send_wr->next;
- } /* eof while send_wr */
+ ud_wr = ud_wr(ud_wr->wr.next);
+ } /* eof while ud_wr */
}
#endif /* DEBUG_GSI_SEND_WR */
static inline int ehca_write_swqe(struct ehca_qp *qp,
struct ehca_wqe *wqe_p,
- const struct ib_send_wr *send_wr,
+ struct ib_send_wr *send_wr,
u32 sq_map_idx,
int hidden)
{
u32 idx;
u64 dma_length;
struct ehca_av *my_av;
- u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+ u32 remote_qkey;
struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
if (unlikely((send_wr->num_sge < 0) ||
@@ -223,20 +223,21 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* no break is intential here */
case IB_QPT_UD:
/* IB 1.2 spec C10-15 compliance */
- if (send_wr->wr.ud.remote_qkey & 0x80000000)
+ remote_qkey = ud_wr(send_wr)->remote_qkey;
+ if (remote_qkey & 0x80000000)
remote_qkey = qp->qkey;
- wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+ wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
wqe_p->local_ee_context_qkey = remote_qkey;
- if (unlikely(!send_wr->wr.ud.ah)) {
- ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+ if (unlikely(!ud_wr(send_wr)->ah)) {
+ ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
return -EINVAL;
}
- if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {
+ if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
return -EINVAL;
}
- my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+ my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
wqe_p->u.ud_av.ud_av = my_av->av;
/*
@@ -255,9 +256,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
qp->qp_type == IB_QPT_GSI)
wqe_p->u.ud_av.ud_av.pmtu = 1;
if (qp->qp_type == IB_QPT_GSI) {
- wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+ wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
#ifdef DEBUG_GSI_SEND_WR
- trace_send_wr_ud(send_wr);
+ trace_ud_wr(ud_wr(send_wr));
#endif /* DEBUG_GSI_SEND_WR */
}
break;
@@ -269,8 +270,8 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
case IB_QPT_RC:
/* TODO: atomic not implemented */
wqe_p->u.nud.remote_virtual_address =
- send_wr->wr.rdma.remote_addr;
- wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+ rdma_wr(send_wr)->remote_addr;
+ wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
/*
* omitted checking of IB_SEND_INLINE
diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c
index f6eff177ace1..cb4e6087dfdb 100644
--- a/drivers/staging/rdma/hfi1/keys.c
+++ b/drivers/staging/rdma/hfi1/keys.c
@@ -354,58 +354,3 @@ bail:
rcu_read_unlock();
return 0;
}
-
-/*
- * Initialize the memory region specified by the work request.
- */
-int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr)
-{
- struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
- struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
- struct hfi1_mregion *mr;
- u32 rkey = wr->wr.fast_reg.rkey;
- unsigned i, n, m;
- int ret = -EINVAL;
- unsigned long flags;
- u64 *page_list;
- size_t ps;
-
- spin_lock_irqsave(&rkt->lock, flags);
- if (pd->user || rkey == 0)
- goto bail;
-
- mr = rcu_dereference_protected(
- rkt->table[(rkey >> (32 - hfi1_lkey_table_size))],
- lockdep_is_held(&rkt->lock));
- if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
- goto bail;
-
- if (wr->wr.fast_reg.page_list_len > mr->max_segs)
- goto bail;
-
- ps = 1UL << wr->wr.fast_reg.page_shift;
- if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
- goto bail;
-
- mr->user_base = wr->wr.fast_reg.iova_start;
- mr->iova = wr->wr.fast_reg.iova_start;
- mr->lkey = rkey;
- mr->length = wr->wr.fast_reg.length;
- mr->access_flags = wr->wr.fast_reg.access_flags;
- page_list = wr->wr.fast_reg.page_list->page_list;
- m = 0;
- n = 0;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- mr->map[m]->segs[n].vaddr = (void *) page_list[i];
- mr->map[m]->segs[n].length = ps;
- if (++n == HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = 0;
-bail:
- spin_unlock_irqrestore(&rkt->lock, flags);
- return ret;
-}
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
index 0208fc200c1a..568f185a022d 100644
--- a/drivers/staging/rdma/hfi1/mr.c
+++ b/drivers/staging/rdma/hfi1/mr.c
@@ -344,9 +344,10 @@ out:
/*
* Allocate a memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
+ * IB_WR_REG_MR send work request.
*
* Return the memory region on success, otherwise return an errno.
+ * FIXME: IB_WR_REG_MR is not supported
*/
struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
@@ -364,36 +365,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
return &mr->ibmr;
}
-struct ib_fast_reg_page_list *
-hfi1_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
-{
- unsigned size = page_list_len * sizeof(u64);
- struct ib_fast_reg_page_list *pl;
-
- if (size > PAGE_SIZE)
- return ERR_PTR(-EINVAL);
-
- pl = kzalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl)
- return ERR_PTR(-ENOMEM);
-
- pl->page_list = kzalloc(size, GFP_KERNEL);
- if (!pl->page_list)
- goto err_free;
-
- return pl;
-
-err_free:
- kfree(pl);
- return ERR_PTR(-ENOMEM);
-}
-
-void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
-{
- kfree(pl->page_list);
- kfree(pl);
-}
-
/**
* hfi1_alloc_fmr - allocate a fast memory region
* @pd: the protection domain for this memory region
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index df1fa56eaf85..f8c36166962f 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -422,7 +422,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
}
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 0b19206ff33e..5fc93bb312f1 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -404,9 +404,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
goto bail;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
@@ -455,9 +455,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -488,21 +488,21 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
+ wqe->atomic_wr.swap);
ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
} else {
qp->s_state = OP(FETCH_ADD);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
ohdr->u.atomic_eth.compare_data = 0;
}
ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr >> 32);
+ wqe->atomic_wr.remote_addr >> 32);
ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr);
+ wqe->atomic_wr.remote_addr);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
+ wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
@@ -629,9 +629,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
*/
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ cpu_to_be64(wqe->rdma_wr.remote_addr + len);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index 8614b070545c..49bc9fd7a51a 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c
@@ -481,8 +481,8 @@ again:
if (wqe->length == 0)
break;
if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
@@ -494,8 +494,8 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = 0;
@@ -512,18 +512,18 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->wr.wr.atomic.remote_addr,
- wqe->wr.wr.atomic.rkey,
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->wr.wr.atomic.compare_add;
+ sdata = wqe->atomic_wr.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->wr.wr.atomic.swap);
+ sdata, wqe->atomic_wr.swap);
hfi1_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -912,7 +912,7 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c
index b536f397737c..6095039c4485 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/staging/rdma/hfi1/uc.c
@@ -147,9 +147,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index d40d1a1e10aa..5a9c784bec04 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -80,7 +80,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
rcu_read_lock();
- qp = hfi1_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+ qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
rcu_read_unlock();
@@ -98,7 +98,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
goto drop;
}
- ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -128,8 +128,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
- sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+ qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
+ sqp->qkey : swqe->ud_wr.remote_qkey;
if (unlikely(qkey != qp->qkey)) {
u16 lid;
@@ -234,7 +234,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
if (sqp->ibqp.qp_type == IB_QPT_GSI ||
sqp->ibqp.qp_type == IB_QPT_SMI)
- wc.pkey_index = swqe->wr.wr.ud.pkey_index;
+ wc.pkey_index = swqe->ud_wr.pkey_index;
else
wc.pkey_index = sqp->s_pkey_index;
} else {
@@ -309,7 +309,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
ah_attr->dlid == HFI1_PERMISSIVE_LID) {
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
@@ -401,18 +401,18 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
bth0 |= IB_BTH_SOLICITED;
bth0 |= extra_bytes << 20;
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
- bth0 |= hfi1_get_pkey(ibp, wqe->wr.wr.ud.pkey_index);
+ bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
else
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
- qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+ qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
/* disarm any ahg */
qp->s_hdr->ahgcount = 0;
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index a13a2b135365..9beb0aa876f0 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -383,9 +383,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
* undefined operations.
* Make sure buffer is large enough to hold the result for atomics.
*/
- if (wr->opcode == IB_WR_FAST_REG_MR) {
- return -EINVAL;
- } else if (qp->ibqp.qp_type == IB_QPT_UC) {
+ if (qp->ibqp.qp_type == IB_QPT_UC) {
if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
return -EINVAL;
} else if (qp->ibqp.qp_type != IB_QPT_RC) {
@@ -394,7 +392,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
wr->opcode != IB_WR_SEND_WITH_IMM)
return -EINVAL;
/* Check UD destination address PD */
- if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
return -EINVAL;
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
return -EINVAL;
@@ -415,7 +413,21 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
rkt = &to_idev(qp->ibqp.device)->lk_table;
pd = to_ipd(qp->ibqp.pd);
wqe = get_swqe_ptr(qp, qp->s_head);
- wqe->wr = *wr;
+
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
wqe->length = 0;
j = 0;
if (wr->num_sge) {
@@ -441,7 +453,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
if (wqe->length > 0x80000000U)
goto bail_inval_free;
} else {
- struct hfi1_ah *ah = to_iah(wr->wr.ud.ah);
+ struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
atomic_inc(&ah->refcount);
}
@@ -2055,8 +2067,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->reg_user_mr = hfi1_reg_user_mr;
ibdev->dereg_mr = hfi1_dereg_mr;
ibdev->alloc_mr = hfi1_alloc_mr;
- ibdev->alloc_fast_reg_page_list = hfi1_alloc_fast_reg_page_list;
- ibdev->free_fast_reg_page_list = hfi1_free_fast_reg_page_list;
ibdev->alloc_fmr = hfi1_alloc_fmr;
ibdev->map_phys_fmr = hfi1_map_phys_fmr;
ibdev->unmap_fmr = hfi1_unmap_fmr;
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index e4a8a0d4ccf8..041ad07ee699 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -348,7 +348,12 @@ struct hfi1_mr {
* in qp->s_max_sge.
*/
struct hfi1_swqe {
- struct ib_send_wr wr; /* don't use wr.sg_list */
+ union {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ struct ib_rdma_wr rdma_wr;
+ struct ib_atomic_wr atomic_wr;
+ struct ib_ud_wr ud_wr;
+ };
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
@@ -1021,13 +1026,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_entries);
-struct ib_fast_reg_page_list *hfi1_alloc_fast_reg_page_list(
- struct ib_device *ibdev, int page_list_len);
-
-void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
-
-int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr);
-
struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
index 79b3dbc97179..d4aa53574e57 100644
--- a/drivers/staging/rdma/ipath/ipath_rc.c
+++ b/drivers/staging/rdma/ipath/ipath_rc.c
@@ -350,9 +350,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
goto bail;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
@@ -401,9 +401,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -433,21 +433,21 @@ int ipath_make_rc_req(struct ipath_qp *qp)
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
+ wqe->atomic_wr.swap);
ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
} else {
qp->s_state = OP(FETCH_ADD);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
ohdr->u.atomic_eth.compare_data = 0;
}
ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr >> 32);
+ wqe->atomic_wr.remote_addr >> 32);
ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr);
+ wqe->atomic_wr.remote_addr);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
+ wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
@@ -567,9 +567,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_init_restart(qp, wqe);
len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ cpu_to_be64(wqe->rdma_wr.remote_addr + len);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
index 2296832f94da..e541a01f1f61 100644
--- a/drivers/staging/rdma/ipath/ipath_ruc.c
+++ b/drivers/staging/rdma/ipath/ipath_ruc.c
@@ -352,8 +352,8 @@ again:
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
break;
@@ -362,8 +362,8 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
qp->r_sge.sge = wqe->sg_list[0];
@@ -376,18 +376,18 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
- wqe->wr.wr.atomic.remote_addr,
- wqe->wr.wr.atomic.rkey,
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->wr.wr.atomic.compare_add;
+ sdata = wqe->atomic_wr.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->wr.wr.atomic.swap);
+ sdata, wqe->atomic_wr.swap);
goto send_comp;
default:
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
index 22e60998f1a7..0246b30280b9 100644
--- a/drivers/staging/rdma/ipath/ipath_uc.c
+++ b/drivers/staging/rdma/ipath/ipath_uc.c
@@ -126,9 +126,9 @@ int ipath_make_uc_req(struct ipath_qp *qp)
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
index 33fcfe206bc9..385d9410a51e 100644
--- a/drivers/staging/rdma/ipath/ipath_ud.c
+++ b/drivers/staging/rdma/ipath/ipath_ud.c
@@ -64,7 +64,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
u32 rlen;
u32 length;
- qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
+ qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
goto done;
@@ -76,8 +76,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
* qkey from the QP context instead of the WR (see 10.2.5).
*/
if (unlikely(qp->ibqp.qp_num &&
- ((int) swqe->wr.wr.ud.remote_qkey < 0 ?
- sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
+ ((int) swqe->ud_wr.remote_qkey < 0 ?
+ sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
/* XXX OK to lose a count once in a while. */
dev->qkey_violations++;
dev->n_pkt_drops++;
@@ -174,7 +174,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
} else
spin_unlock_irqrestore(&rq->lock, flags);
- ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
if (ah_attr->ah_flags & IB_AH_GRH) {
ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
wc.wc_flags |= IB_WC_GRH;
@@ -224,7 +224,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- swqe->wr.send_flags & IB_SEND_SOLICITED);
+ swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
drop:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
@@ -279,7 +279,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
next_cur = 0;
/* Construct the header. */
- ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
dev->n_multicast_xmit++;
@@ -321,7 +321,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
- qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
@@ -339,9 +339,9 @@ int ipath_make_ud_req(struct ipath_qp *qp)
lrh0 = IPATH_LRH_BTH;
ohdr = &qp->s_hdr.u.oth;
}
- if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
- ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+ ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
@@ -359,7 +359,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
qp->s_hdr.lrh[3] = cpu_to_be16(lid);
} else
qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
- if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
@@ -371,14 +371,14 @@ int ipath_make_ud_req(struct ipath_qp *qp)
ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
ah_attr->dlid != IPATH_PERMISSIVE_LID ?
cpu_to_be32(IPATH_MULTICAST_QPN) :
- cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
- qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+ qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
index a2fb41bba117..1778dee13f99 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ b/drivers/staging/rdma/ipath/ipath_verbs.c
@@ -374,7 +374,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
wr->opcode != IB_WR_SEND_WITH_IMM)
goto bail_inval;
/* Check UD destination address PD */
- if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
goto bail_inval;
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
goto bail_inval;
@@ -395,7 +395,20 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
}
wqe = get_swqe_ptr(qp, qp->s_head);
- wqe->wr = *wr;
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
wqe->length = 0;
if (wr->num_sge) {
acc = wr->opcode >= IB_WR_RDMA_READ ?
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
index ec167e545e15..0a90a56870ab 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ b/drivers/staging/rdma/ipath/ipath_verbs.h
@@ -277,7 +277,13 @@ struct ipath_mr {
* in qp->s_max_sge.
*/
struct ipath_swqe {
- struct ib_send_wr wr; /* don't use wr.sg_list */
+ union {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ struct ib_ud_wr ud_wr;
+ struct ib_rdma_wr rdma_wr;
+ struct ib_atomic_wr atomic_wr;
+ };
+
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5a8677bafe04..7501626ab529 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -214,6 +214,8 @@ enum {
MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28,
MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29,
MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30,
+ MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
+ MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
};
enum {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index de45a51b3f04..fe052e234906 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -135,7 +135,10 @@ struct mlx4_rss_context {
struct mlx4_qp_path {
u8 fl;
- u8 vlan_control;
+ union {
+ u8 vlan_control;
+ u8 control;
+ };
u8 disable_pkey_check;
u8 pkey_index;
u8 counter_index;
@@ -156,9 +159,16 @@ struct mlx4_qp_path {
};
enum { /* fl */
- MLX4_FL_CV = 1 << 6,
- MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2
+ MLX4_FL_CV = 1 << 6,
+ MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2,
+ MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1,
+ MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0,
};
+
+enum { /* control */
+ MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7,
+};
+
enum { /* vlan_control */
MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6,
MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */
@@ -254,6 +264,8 @@ enum {
MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32,
MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32,
MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32,
+ MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32,
};
enum { /* param3 */
@@ -436,11 +448,13 @@ enum mlx4_update_qp_attr {
MLX4_UPDATE_QP_VSD = 1 << 1,
MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2,
MLX4_UPDATE_QP_QOS_VPORT = 1 << 3,
- MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1
+ MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB = 1 << 4,
+ MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 5) - 1
};
enum mlx4_update_qp_params_flags {
- MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 0,
+ MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB = 1 << 0,
+ MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 1,
};
struct mlx4_update_qp_params {
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 7ccc961f33e9..1e4438ea2380 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge {
};
struct svc_rdma_fastreg_mr {
struct ib_mr *mr;
- void *kva;
- struct ib_fast_reg_page_list *page_list;
- int page_list_len;
+ struct scatterlist *sg;
+ int sg_nents;
unsigned long access_flags;
- unsigned long map_len;
enum dma_data_direction direction;
struct list_head frmr_list;
};
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index fde33ac6b58a..11528591d0d7 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -47,6 +47,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <net/ipv6.h>
+#include <net/net_namespace.h>
struct rdma_addr_client {
atomic_t refcount;
@@ -64,6 +65,16 @@ void rdma_addr_register_client(struct rdma_addr_client *client);
*/
void rdma_addr_unregister_client(struct rdma_addr_client *client);
+/**
+ * struct rdma_dev_addr - Contains resolved RDMA hardware addresses
+ * @src_dev_addr: Source MAC address.
+ * @dst_dev_addr: Destination MAC address.
+ * @broadcast: Broadcast address of the device.
+ * @dev_type: The interface hardware type of the device.
+ * @bound_dev_if: An optional device interface index.
+ * @transport: The transport type used.
+ * @net: Network namespace containing the bound_dev_if net_dev.
+ */
struct rdma_dev_addr {
unsigned char src_dev_addr[MAX_ADDR_LEN];
unsigned char dst_dev_addr[MAX_ADDR_LEN];
@@ -71,11 +82,14 @@ struct rdma_dev_addr {
unsigned short dev_type;
int bound_dev_if;
enum rdma_transport_type transport;
+ struct net *net;
};
/**
* rdma_translate_ip - Translate a local IP address to an RDMA hardware
* address.
+ *
+ * The dev_addr->net field must be initialized.
*/
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id);
@@ -90,7 +104,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
* @dst_addr: The destination address to resolve.
* @addr: A reference to a data location that will receive the resolved
* addresses. The data location must remain valid until the callback has
- * been invoked.
+ * been invoked. The net field of the addr struct must be valid.
* @timeout_ms: Amount of time to wait for the address resolution to complete.
* @callback: Call invoked once address resolution has completed, timed out,
* or been canceled. A status of 0 indicates success.
@@ -112,7 +126,7 @@ int rdma_addr_size(struct sockaddr *addr);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *smac, u16 *vlan_id);
+ u8 *smac, u16 *vlan_id, int if_index);
static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
{
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index bd92130f4ac5..269a27cf0a46 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -43,6 +43,8 @@
* @port_num: The port number of the device to query.
* @index: The index into the cached GID table to query.
* @gid: The GID value found at the specified index.
+ * @attr: The GID attribute found at the specified index (only in RoCE).
+ * NULL means ignore (output parameter).
*
* ib_get_cached_gid() fetches the specified GID table entry stored in
* the local software cache.
@@ -50,13 +52,15 @@
int ib_get_cached_gid(struct ib_device *device,
u8 port_num,
int index,
- union ib_gid *gid);
+ union ib_gid *gid,
+ struct ib_gid_attr *attr);
/**
* ib_find_cached_gid - Returns the port number and GID table index where
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the cached GID table where the GID was found. This
* parameter may be NULL.
@@ -64,12 +68,40 @@ int ib_get_cached_gid(struct ib_device *device,
* ib_find_cached_gid() searches for the specified GID value in
* the local software cache.
*/
-int ib_find_cached_gid(struct ib_device *device,
+int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
- u8 *port_num,
- u16 *index);
+ struct net_device *ndev,
+ u8 *port_num,
+ u16 *index);
/**
+ * ib_find_cached_gid_by_port - Returns the GID table index where a specified
+ * GID value occurs
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value sould be
+ * searched.
+ * @ndev: In RoCE, the net device of the device. Null means ignore.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
+int ib_find_cached_gid_by_port(struct ib_device *device,
+ const union ib_gid *gid,
+ u8 port_num,
+ struct net_device *ndev,
+ u16 *index);
+
+int ib_find_gid_by_filter(struct ib_device *device,
+ const union ib_gid *gid,
+ u8 port_num,
+ bool (*filter)(const union ib_gid *gid,
+ const struct ib_gid_attr *,
+ void *),
+ void *context, u16 *index);
+/**
* ib_get_cached_pkey - Returns a cached PKey table entry
* @device: The device to query.
* @port_num: The port number of the device to query.
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index 709a5331e6b9..e99d8f9a4551 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -76,7 +76,7 @@ enum {
IB_OPCODE_UC = 0x20,
IB_OPCODE_RD = 0x40,
IB_OPCODE_UD = 0x60,
- /* per IBTA 3.1 Table 38, A10.3.2 */
+ /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
IB_OPCODE_CNP = 0x80,
/* operations -- just used to define real constants */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 7e071a6abb34..301969552d0a 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -39,6 +39,7 @@
#include <linux/compiler.h>
#include <linux/atomic.h>
+#include <linux/netdevice.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_mad.h>
@@ -154,11 +155,18 @@ struct ib_sa_path_rec {
u8 packet_life_time_selector;
u8 packet_life_time;
u8 preference;
- u8 smac[ETH_ALEN];
u8 dmac[ETH_ALEN];
- u16 vlan_id;
+ /* ignored in IB */
+ int ifindex;
+ /* ignored in IB */
+ struct net *net;
};
+static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
+{
+ return rec->net ? dev_get_by_index(rec->net, rec->ifindex) : NULL;
+}
+
#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1)
#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7845fae6f2df..9a68a19532ba 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -137,6 +137,8 @@ enum ib_device_cap_flags {
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
+ IB_DEVICE_RC_IP_CSUM = (1<<25),
+ IB_DEVICE_RAW_IP_CSUM = (1<<26),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
@@ -474,7 +476,7 @@ enum ib_event_type {
IB_EVENT_GID_CHANGE,
};
-__attribute_const__ const char *ib_event_msg(enum ib_event_type event);
+const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
struct ib_event {
struct ib_device *device;
@@ -697,7 +699,6 @@ struct ib_ah_attr {
u8 ah_flags;
u8 port_num;
u8 dmac[ETH_ALEN];
- u16 vlan_id;
};
enum ib_wc_status {
@@ -725,7 +726,7 @@ enum ib_wc_status {
IB_WC_GENERAL_ERR
};
-__attribute_const__ const char *ib_wc_status_msg(enum ib_wc_status status);
+const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
enum ib_wc_opcode {
IB_WC_SEND,
@@ -736,7 +737,7 @@ enum ib_wc_opcode {
IB_WC_BIND_MW,
IB_WC_LSO,
IB_WC_LOCAL_INV,
- IB_WC_FAST_REG_MR,
+ IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
/*
@@ -873,7 +874,6 @@ enum ib_qp_create_flags {
IB_QP_CREATE_RESERVED_END = 1 << 31,
};
-
/*
* Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
* callback to destroy the passed in QP.
@@ -957,10 +957,10 @@ enum ib_qp_attr_mask {
IB_QP_PATH_MIG_STATE = (1<<18),
IB_QP_CAP = (1<<19),
IB_QP_DEST_QPN = (1<<20),
- IB_QP_SMAC = (1<<21),
- IB_QP_ALT_SMAC = (1<<22),
- IB_QP_VID = (1<<23),
- IB_QP_ALT_VID = (1<<24),
+ IB_QP_RESERVED1 = (1<<21),
+ IB_QP_RESERVED2 = (1<<22),
+ IB_QP_RESERVED3 = (1<<23),
+ IB_QP_RESERVED4 = (1<<24),
};
enum ib_qp_state {
@@ -1010,10 +1010,6 @@ struct ib_qp_attr {
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
- u8 smac[ETH_ALEN];
- u8 alt_smac[ETH_ALEN];
- u16 vlan_id;
- u16 alt_vlan_id;
};
enum ib_wr_opcode {
@@ -1028,7 +1024,7 @@ enum ib_wr_opcode {
IB_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV,
IB_WR_LOCAL_INV,
- IB_WR_FAST_REG_MR,
+ IB_WR_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_BIND_MW,
@@ -1066,12 +1062,6 @@ struct ib_sge {
u32 lkey;
};
-struct ib_fast_reg_page_list {
- struct ib_device *device;
- u64 *page_list;
- unsigned int max_page_list_len;
-};
-
/**
* struct ib_mw_bind_info - Parameters for a memory window bind operation.
* @mr: A memory region to bind the memory window to.
@@ -1100,54 +1090,89 @@ struct ib_send_wr {
__be32 imm_data;
u32 invalidate_rkey;
} ex;
- union {
- struct {
- u64 remote_addr;
- u32 rkey;
- } rdma;
- struct {
- u64 remote_addr;
- u64 compare_add;
- u64 swap;
- u64 compare_add_mask;
- u64 swap_mask;
- u32 rkey;
- } atomic;
- struct {
- struct ib_ah *ah;
- void *header;
- int hlen;
- int mss;
- u32 remote_qpn;
- u32 remote_qkey;
- u16 pkey_index; /* valid for GSI only */
- u8 port_num; /* valid for DR SMPs on switch only */
- } ud;
- struct {
- u64 iova_start;
- struct ib_fast_reg_page_list *page_list;
- unsigned int page_shift;
- unsigned int page_list_len;
- u32 length;
- int access_flags;
- u32 rkey;
- } fast_reg;
- struct {
- struct ib_mw *mw;
- /* The new rkey for the memory window. */
- u32 rkey;
- struct ib_mw_bind_info bind_info;
- } bind_mw;
- struct {
- struct ib_sig_attrs *sig_attrs;
- struct ib_mr *sig_mr;
- int access_flags;
- struct ib_sge *prot;
- } sig_handover;
- } wr;
- u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
+struct ib_rdma_wr {
+ struct ib_send_wr wr;
+ u64 remote_addr;
+ u32 rkey;
+};
+
+static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_rdma_wr, wr);
+}
+
+struct ib_atomic_wr {
+ struct ib_send_wr wr;
+ u64 remote_addr;
+ u64 compare_add;
+ u64 swap;
+ u64 compare_add_mask;
+ u64 swap_mask;
+ u32 rkey;
+};
+
+static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_atomic_wr, wr);
+}
+
+struct ib_ud_wr {
+ struct ib_send_wr wr;
+ struct ib_ah *ah;
+ void *header;
+ int hlen;
+ int mss;
+ u32 remote_qpn;
+ u32 remote_qkey;
+ u16 pkey_index; /* valid for GSI only */
+ u8 port_num; /* valid for DR SMPs on switch only */
+};
+
+static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_ud_wr, wr);
+}
+
+struct ib_reg_wr {
+ struct ib_send_wr wr;
+ struct ib_mr *mr;
+ u32 key;
+ int access;
+};
+
+static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_reg_wr, wr);
+}
+
+struct ib_bind_mw_wr {
+ struct ib_send_wr wr;
+ struct ib_mw *mw;
+ /* The new rkey for the memory window. */
+ u32 rkey;
+ struct ib_mw_bind_info bind_info;
+};
+
+static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_bind_mw_wr, wr);
+}
+
+struct ib_sig_handover_wr {
+ struct ib_send_wr wr;
+ struct ib_sig_attrs *sig_attrs;
+ struct ib_mr *sig_mr;
+ int access_flags;
+ struct ib_sge *prot;
+};
+
+static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct ib_sig_handover_wr, wr);
+}
+
struct ib_recv_wr {
struct ib_recv_wr *next;
u64 wr_id;
@@ -1334,6 +1359,9 @@ struct ib_mr {
struct ib_uobject *uobject;
u32 lkey;
u32 rkey;
+ u64 iova;
+ u32 length;
+ unsigned int page_size;
atomic_t usecnt; /* count number of MWs */
};
@@ -1718,9 +1746,9 @@ struct ib_device {
struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
- struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
- int page_list_len);
- void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
+ int (*map_mr_sg)(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents);
int (*rereg_phys_mr)(struct ib_mr *mr,
int mr_rereg_mask,
struct ib_pd *pd,
@@ -2176,7 +2204,8 @@ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
}
int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid);
+ u8 port_num, int index, union ib_gid *gid,
+ struct ib_gid_attr *attr);
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
@@ -2190,7 +2219,7 @@ int ib_modify_port(struct ib_device *device,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index);
+ struct net_device *ndev, u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
@@ -2829,33 +2858,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
u32 max_num_sg);
/**
- * ib_alloc_fast_reg_page_list - Allocates a page list array
- * @device - ib device pointer.
- * @page_list_len - size of the page list array to be allocated.
- *
- * This allocates and returns a struct ib_fast_reg_page_list * and a
- * page_list array that is at least page_list_len in size. The actual
- * size is returned in max_page_list_len. The caller is responsible
- * for initializing the contents of the page_list array before posting
- * a send work request with the IB_WC_FAST_REG_MR opcode.
- *
- * The page_list array entries must be translated using one of the
- * ib_dma_*() functions just like the addresses passed to
- * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
- * ib_fast_reg_page_list must not be modified by the caller until the
- * IB_WC_FAST_REG_MR work request completes.
- */
-struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
- struct ib_device *device, int page_list_len);
-
-/**
- * ib_free_fast_reg_page_list - Deallocates a previously allocated
- * page list array.
- * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
- */
-void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-
-/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
* R_Key and L_Key.
* @mr - struct ib_mr pointer to be updated.
@@ -3023,4 +3025,28 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
+int ib_map_mr_sg(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents,
+ unsigned int page_size);
+
+static inline int
+ib_map_mr_sg_zbva(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents,
+ unsigned int page_size)
+{
+ int n;
+
+ n = ib_map_mr_sg(mr, sg, sg_nents, page_size);
+ mr->iova = 0;
+
+ return n;
+}
+
+int ib_sg_to_pages(struct ib_mr *mr,
+ struct scatterlist *sgl,
+ int sg_nents,
+ int (*set_page)(struct ib_mr *, u64));
+
#endif /* IB_VERBS_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c92522c192d2..afe44fde72a5 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -62,7 +62,7 @@ enum rdma_cm_event_type {
RDMA_CM_EVENT_TIMEWAIT_EXIT
};
-__attribute_const__ const char *rdma_event_msg(enum rdma_cm_event_type event);
+const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event);
enum rdma_port_space {
RDMA_PS_SDP = 0x0001,
@@ -160,13 +160,17 @@ struct rdma_cm_id {
/**
* rdma_create_id - Create an RDMA identifier.
*
+ * @net: The network namespace in which to create the new id.
* @event_handler: User callback invoked to report events associated with the
* returned rdma_id.
* @context: User specified context associated with the id.
* @ps: RDMA port space.
* @qp_type: type of queue pair associated with the id.
+ *
+ * The id holds a reference on the network namespace until it is destroyed.
*/
-struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+struct rdma_cm_id *rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
void *context, enum rdma_port_space ps,
enum ib_qp_type qp_type);
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 978841eeaff1..8126c143a519 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -92,6 +92,7 @@ enum {
enum {
IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
+ IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
};
@@ -516,6 +517,25 @@ struct ib_uverbs_create_qp {
__u64 driver_data[0];
};
+struct ib_uverbs_ex_create_qp {
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 send_cq_handle;
+ __u32 recv_cq_handle;
+ __u32 srq_handle;
+ __u32 max_send_wr;
+ __u32 max_recv_wr;
+ __u32 max_send_sge;
+ __u32 max_recv_sge;
+ __u32 max_inline_data;
+ __u8 sq_sig_all;
+ __u8 qp_type;
+ __u8 is_srq;
+ __u8 reserved;
+ __u32 comp_mask;
+ __u32 create_flags;
+};
+
struct ib_uverbs_open_qp {
__u64 response;
__u64 user_handle;
@@ -538,6 +558,12 @@ struct ib_uverbs_create_qp_resp {
__u32 reserved;
};
+struct ib_uverbs_ex_create_qp_resp {
+ struct ib_uverbs_create_qp_resp base;
+ __u32 comp_mask;
+ __u32 response_length;
+};
+
/*
* This struct needs to remain a multiple of 8 bytes to keep the
* alignment of the modify QP parameters.
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index ba1210253f5e..52b4a2f993f2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
return -ENOMEM;
/* Create the RDMA CM ID */
- rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
- IB_QPT_RC);
+ rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(rdma->cm_id))
goto error;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a833ab7898fe..f222885ac0c7 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -336,7 +336,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index f17d09567890..b3fdebb57460 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -75,7 +75,11 @@ struct rds_ib_connect_private {
struct rds_ib_send_work {
void *s_op;
- struct ib_send_wr s_wr;
+ union {
+ struct ib_send_wr s_wr;
+ struct ib_rdma_wr s_rdma_wr;
+ struct ib_atomic_wr s_atomic_wr;
+ };
struct ib_sge s_sge[RDS_IB_MAX_SGE];
unsigned long s_queued;
};
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 2b2370e7f356..da5a7fb98c77 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -668,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+ ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 670882c752e9..eac30bf486d7 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -777,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
send->s_queued = jiffies;
if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
- send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
- send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
- send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
- send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
- send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
+ send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
+ send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
+ send->s_atomic_wr.swap = op->op_m_cswp.swap;
+ send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
+ send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
} else { /* FADD */
- send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
- send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
- send->s_wr.wr.atomic.swap = 0;
- send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
- send->s_wr.wr.atomic.swap_mask = 0;
+ send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
+ send->s_atomic_wr.compare_add = op->op_m_fadd.add;
+ send->s_atomic_wr.swap = 0;
+ send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
+ send->s_atomic_wr.swap_mask = 0;
}
nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
- send->s_wr.num_sge = 1;
- send->s_wr.next = NULL;
- send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
- send->s_wr.wr.atomic.rkey = op->op_rkey;
+ send->s_atomic_wr.wr.num_sge = 1;
+ send->s_atomic_wr.wr.next = NULL;
+ send->s_atomic_wr.remote_addr = op->op_remote_addr;
+ send->s_atomic_wr.rkey = op->op_rkey;
send->s_op = op;
rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
@@ -818,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
- failed_wr = &send->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
+ failed_wr = &send->s_atomic_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
- send, &send->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &send->s_wr);
+ send, &send->s_atomic_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &send->s_atomic_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -831,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
goto out;
}
- if (unlikely(failed_wr != &send->s_wr)) {
+ if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
- BUG_ON(failed_wr != &send->s_wr);
+ BUG_ON(failed_wr != &send->s_atomic_wr.wr);
}
out:
@@ -904,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
- send->s_wr.wr.rdma.remote_addr = remote_addr;
- send->s_wr.wr.rdma.rkey = op->op_rkey;
+ send->s_rdma_wr.remote_addr = remote_addr;
+ send->s_rdma_wr.rkey = op->op_rkey;
if (num_sge > max_sge) {
- send->s_wr.num_sge = max_sge;
+ send->s_rdma_wr.wr.num_sge = max_sge;
num_sge -= max_sge;
} else {
- send->s_wr.num_sge = num_sge;
+ send->s_rdma_wr.wr.num_sge = num_sge;
}
- send->s_wr.next = NULL;
+ send->s_rdma_wr.wr.next = NULL;
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
- for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+ for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+ scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
send->s_sge[j].addr =
ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -934,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_rdma_wr.wr,
+ send->s_rdma_wr.wr.num_sge,
+ send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -955,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_rdma_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_rdma_wr.wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -968,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
goto out;
}
- if (unlikely(failed_wr != &first->s_wr)) {
+ if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
- BUG_ON(failed_wr != &first->s_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
}
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 3df0295c6659..576f1825fc55 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/iw.h b/net/rds/iw.h
index cbe6674e31ee..5af01d1758b3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -74,10 +74,13 @@ struct rds_iw_send_work {
struct rm_rdma_op *s_op;
struct rds_iw_mapping *s_mapping;
struct ib_mr *s_mr;
- struct ib_fast_reg_page_list *s_page_list;
unsigned char s_remap_count;
- struct ib_send_wr s_wr;
+ union {
+ struct ib_send_wr s_send_wr;
+ struct ib_rdma_wr s_rdma_wr;
+ struct ib_reg_wr s_reg_wr;
+ };
struct ib_sge s_sge[RDS_IW_MAX_SGE];
unsigned long s_queued;
};
@@ -195,7 +198,7 @@ struct rds_iw_device {
/* Magic WR_ID for ACKs */
#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
-#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL)
+#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
struct rds_iw_statistics {
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6553a6fb2bc..aea4c911bc76 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+ ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index d3d4454ffc84..b09a40c1adce 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -47,7 +47,6 @@ struct rds_iw_mr {
struct rdma_cm_id *cm_id;
struct ib_mr *mr;
- struct ib_fast_reg_page_list *page_list;
struct rds_iw_mapping mapping;
unsigned char remap_count;
@@ -77,8 +76,8 @@ struct rds_iw_mr_pool {
static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr,
struct scatterlist *sg, unsigned int nents);
static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
sg->bytes = 0;
}
-static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
- struct rds_iw_scatterlist *sg)
+static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
+ struct rds_iw_scatterlist *sg)
{
struct ib_device *dev = rds_iwdev->dev;
- u64 *dma_pages = NULL;
- int i, j, ret;
+ int i, ret;
WARN_ON(sg->dma_len);
sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
if (unlikely(!sg->dma_len)) {
printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
- return ERR_PTR(-EBUSY);
+ return -EBUSY;
}
sg->bytes = 0;
@@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
if (sg->dma_npages > fastreg_message_size)
goto out_unmap;
- dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
- if (!dma_pages) {
- ret = -ENOMEM;
- goto out_unmap;
- }
- for (i = j = 0; i < sg->dma_len; ++i) {
- unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
- u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
- u64 end_addr;
- end_addr = dma_addr + dma_len;
- dma_addr &= ~PAGE_MASK;
- for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
- dma_pages[j++] = dma_addr;
- BUG_ON(j > sg->dma_npages);
- }
-
- return dma_pages;
+ return 0;
out_unmap:
ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
sg->dma_len = 0;
- kfree(dma_pages);
- return ERR_PTR(ret);
+ return ret;
}
@@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
INIT_LIST_HEAD(&ibmr->mapping.m_list);
ibmr->mapping.m_mr = ibmr;
- err = rds_iw_init_fastreg(pool, ibmr);
+ err = rds_iw_init_reg(pool, ibmr);
if (err)
goto out_no_cigar;
@@ -620,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
ibmr->cm_id = cm_id;
ibmr->device = rds_iwdev;
- ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
+ ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
if (ret == 0)
*key_ret = ibmr->mr->rkey;
else
@@ -636,7 +617,7 @@ out:
}
/*
- * iWARP fastreg handling
+ * iWARP reg handling
*
* The life cycle of a fastreg registration is a bit different from
* FMRs.
@@ -648,7 +629,7 @@ out:
* This creates a bit of a problem for us, as we do not have the destination
* IP in GET_MR, so the connection must be setup prior to the GET_MR call for
* RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
- * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
+ * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
* before queuing the SEND. When completions for these arrive, they are
* dispatched to the MR has a bit set showing that RDMa can be performed.
*
@@ -657,11 +638,10 @@ out:
* The expectation there is that this invalidation step includes ALL
* PREVIOUSLY FREED MRs.
*/
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
- struct rds_iw_mr *ibmr)
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
+ struct rds_iw_mr *ibmr)
{
struct rds_iw_device *rds_iwdev = pool->device;
- struct ib_fast_reg_page_list *page_list = NULL;
struct ib_mr *mr;
int err;
@@ -674,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
return err;
}
- /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
- * is not filled in.
- */
- page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
- if (IS_ERR(page_list)) {
- err = PTR_ERR(page_list);
-
- printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
- ib_dereg_mr(mr);
- return err;
- }
-
- ibmr->page_list = page_list;
ibmr->mr = mr;
return 0;
}
-static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
+static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
{
struct rds_iw_mr *ibmr = mapping->m_mr;
- struct ib_send_wr f_wr, *failed_wr;
- int ret;
+ struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr *failed_wr;
+ int ret, n;
+
+ n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
+ if (unlikely(n != m_sg->len))
+ return n < 0 ? n : -EINVAL;
+
+ reg_wr.wr.next = NULL;
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.mr = ibmr->mr;
+ reg_wr.key = mapping->m_rkey;
+ reg_wr.access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
/*
- * Perform a WR for the fast_reg_mr. Each individual page
+ * Perform a WR for the reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
- * inside the fast_reg_mr WR. The key used is a rolling 8bit
+ * inside the reg_mr WR. The key used is a rolling 8bit
* counter, which should guarantee uniqueness.
*/
ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
mapping->m_rkey = ibmr->mr->rkey;
- memset(&f_wr, 0, sizeof(f_wr));
- f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
- f_wr.opcode = IB_WR_FAST_REG_MR;
- f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
- f_wr.wr.fast_reg.rkey = mapping->m_rkey;
- f_wr.wr.fast_reg.page_list = ibmr->page_list;
- f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
- f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
- f_wr.wr.fast_reg.iova_start = 0;
- f_wr.send_flags = IB_SEND_SIGNALED;
-
- failed_wr = &f_wr;
- ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
- BUG_ON(failed_wr != &f_wr);
+ failed_wr = &reg_wr.wr;
+ ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
+ BUG_ON(failed_wr != &reg_wr.wr);
if (ret)
printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
__func__, __LINE__, ret);
@@ -754,21 +723,20 @@ out:
return ret;
}
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
- struct rds_iw_mr *ibmr,
- struct scatterlist *sg,
- unsigned int sg_len)
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
+ struct rds_iw_mr *ibmr,
+ struct scatterlist *sg,
+ unsigned int sg_len)
{
struct rds_iw_device *rds_iwdev = pool->device;
struct rds_iw_mapping *mapping = &ibmr->mapping;
u64 *dma_pages;
- int i, ret = 0;
+ int ret = 0;
rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
- dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
- if (IS_ERR(dma_pages)) {
- ret = PTR_ERR(dma_pages);
+ ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
+ if (ret) {
dma_pages = NULL;
goto out;
}
@@ -778,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
goto out;
}
- for (i = 0; i < mapping->m_sg.dma_npages; ++i)
- ibmr->page_list->page_list[i] = dma_pages[i];
-
- ret = rds_iw_rdma_build_fastreg(mapping);
+ ret = rds_iw_rdma_reg_mr(mapping);
if (ret)
goto out;
@@ -867,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr)
{
- if (ibmr->page_list)
- ib_free_fast_reg_page_list(ibmr->page_list);
if (ibmr->mr)
ib_dereg_mr(ibmr->mr);
}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 86152ec3b887..e20bd503f4bd 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
send->s_op = NULL;
send->s_mapping = NULL;
- send->s_wr.next = NULL;
- send->s_wr.wr_id = i;
- send->s_wr.sg_list = send->s_sge;
- send->s_wr.num_sge = 1;
- send->s_wr.opcode = IB_WR_SEND;
- send->s_wr.send_flags = 0;
- send->s_wr.ex.imm_data = 0;
+ send->s_send_wr.next = NULL;
+ send->s_send_wr.wr_id = i;
+ send->s_send_wr.sg_list = send->s_sge;
+ send->s_send_wr.num_sge = 1;
+ send->s_send_wr.opcode = IB_WR_SEND;
+ send->s_send_wr.send_flags = 0;
+ send->s_send_wr.ex.imm_data = 0;
sge = rds_iw_data_sge(ic, send->s_sge);
sge->lkey = 0;
@@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
break;
}
-
- send->s_page_list = ib_alloc_fast_reg_page_list(
- ic->i_cm_id->device, fastreg_message_size);
- if (IS_ERR(send->s_page_list)) {
- printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
- break;
- }
}
}
@@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
BUG_ON(!send->s_mr);
ib_dereg_mr(send->s_mr);
- BUG_ON(!send->s_page_list);
- ib_free_fast_reg_page_list(send->s_page_list);
- if (send->s_wr.opcode == 0xdead)
+ if (send->s_send_wr.opcode == 0xdead)
continue;
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
continue;
}
- if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
+ if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
ic->i_fastreg_posted = 1;
continue;
}
@@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
send = &ic->i_sends[oldest];
/* In the error case, wc.opcode sometimes contains garbage */
- switch (send->s_wr.opcode) {
+ switch (send->s_send_wr.opcode) {
case IB_WR_SEND:
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, wc.status);
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
@@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
default:
printk_ratelimited(KERN_NOTICE
"RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
- __func__, send->s_wr.opcode);
+ __func__, send->s_send_wr.opcode);
break;
}
- send->s_wr.opcode = 0xdead;
- send->s_wr.num_sge = 1;
+ send->s_send_wr.opcode = 0xdead;
+ send->s_send_wr.num_sge = 1;
if (time_after(jiffies, send->s_queued + HZ/2))
rds_iw_stats_inc(s_iw_tx_stalled);
@@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
WARN_ON(pos != send - ic->i_sends);
- send->s_wr.send_flags = send_flags;
- send->s_wr.opcode = IB_WR_SEND;
- send->s_wr.num_sge = 2;
- send->s_wr.next = NULL;
+ send->s_send_wr.send_flags = send_flags;
+ send->s_send_wr.opcode = IB_WR_SEND;
+ send->s_send_wr.num_sge = 2;
+ send->s_send_wr.next = NULL;
send->s_queued = jiffies;
send->s_op = NULL;
@@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
} else {
/* We're sending a packet with no payload. There is only
* one SGE */
- send->s_wr.num_sge = 1;
+ send->s_send_wr.num_sge = 1;
sge = &send->s_sge[0];
}
@@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
ic->i_unsignaled_bytes -= len;
if (ic->i_unsignaled_bytes <= 0) {
ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
/*
* Always signal the last one if we're stopping due to flow control.
*/
if (flow_controlled && i == (work_alloc-1))
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
sent += len;
rm->data.op_dmaoff += len;
@@ -722,7 +713,7 @@ add_header:
}
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_send_wr.next = &send->s_send_wr;
prev = send;
pos = (pos + 1) % ic->i_send_ring.w_nr;
@@ -736,7 +727,7 @@ add_header:
/* if we finished the message then send completion owns it */
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_rm = ic->i_rm;
- prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
}
@@ -748,11 +739,11 @@ add_header:
rds_iw_send_add_credits(conn, credit_alloc - i);
/* XXX need to worry about failed_wr and partial sends. */
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_send_wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_send_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_send_wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@@ -770,24 +761,26 @@ out:
return ret;
}
-static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
+static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
+ struct scatterlist *sg,
+ int sg_nents)
{
- BUG_ON(nent > send->s_page_list->max_page_list_len);
- /*
- * Perform a WR for the fast_reg_mr. Each individual page
- * in the sg list is added to the fast reg page list and placed
- * inside the fast_reg_mr WR.
- */
- send->s_wr.opcode = IB_WR_FAST_REG_MR;
- send->s_wr.wr.fast_reg.length = len;
- send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
- send->s_wr.wr.fast_reg.page_list = send->s_page_list;
- send->s_wr.wr.fast_reg.page_list_len = nent;
- send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
- send->s_wr.wr.fast_reg.iova_start = sg_addr;
+ int n;
+
+ n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
+ if (unlikely(n != sg_nents))
+ return n < 0 ? n : -EINVAL;
+
+ send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
+ send->s_reg_wr.wr.wr_id = 0;
+ send->s_reg_wr.wr.num_sge = 0;
+ send->s_reg_wr.mr = send->s_mr;
+ send->s_reg_wr.key = send->s_mr->rkey;
+ send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
+
+ return 0;
}
int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
int sent;
int ret;
int num_sge;
+ int sg_nents;
rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
@@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat = &op->op_sg[0];
sent = 0;
num_sge = op->op_count;
+ sg_nents = 0;
for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
- send->s_wr.send_flags = 0;
+ send->s_rdma_wr.wr.send_flags = 0;
send->s_queued = jiffies;
/*
@@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
- send->s_wr.send_flags = IB_SEND_SIGNALED;
+ send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
}
/* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
*/
if (op->op_write)
- send->s_wr.opcode = IB_WR_RDMA_WRITE;
+ send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
else
- send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
- send->s_wr.wr.rdma.remote_addr = remote_addr;
- send->s_wr.wr.rdma.rkey = op->op_rkey;
+ send->s_rdma_wr.remote_addr = remote_addr;
+ send->s_rdma_wr.rkey = op->op_rkey;
send->s_op = op;
if (num_sge > rds_iwdev->max_sge) {
- send->s_wr.num_sge = rds_iwdev->max_sge;
+ send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
num_sge -= rds_iwdev->max_sge;
} else
- send->s_wr.num_sge = num_sge;
+ send->s_rdma_wr.wr.num_sge = num_sge;
- send->s_wr.next = NULL;
+ send->s_rdma_wr.wr.next = NULL;
if (prev)
- prev->s_wr.next = &send->s_wr;
+ prev->s_send_wr.next = &send->s_rdma_wr.wr;
- for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+ for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+ scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
- if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
- send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
+ if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
+ sg_nents++;
else {
send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
send->s_sge[j].length = len;
@@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat++;
}
- if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
- send->s_wr.num_sge = 1;
+ if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
+ send->s_rdma_wr.wr.num_sge = 1;
send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
- &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+ &send->s_rdma_wr,
+ send->s_rdma_wr.wr.num_sge,
+ send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
/* if we finished the message then send completion owns it */
if (scat == &op->op_sg[op->op_count])
- first->s_wr.send_flags = IB_SEND_SIGNALED;
+ first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
if (i < work_alloc) {
rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* fastreg_mr (or possibly a dma_mr)
*/
if (!op->op_write) {
- rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
- op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
+ ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
+ &op->op_sg[0], sg_nents);
+ if (ret) {
+ printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
+ goto out;
+ }
work_alloc++;
}
- failed_wr = &first->s_wr;
- ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+ failed_wr = &first->s_rdma_wr.wr;
+ ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
- first, &first->s_wr, ret, failed_wr);
- BUG_ON(failed_wr != &first->s_wr);
+ first, &first->s_rdma_wr, ret, failed_wr);
+ BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index b9b40af5345b..9c1fed81bf0f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
struct rdma_cm_id *cm_id;
int ret;
- cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
- IB_QPT_RC);
+ cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5318951b3b53..a1434447b0d6 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
if (IS_ERR(f->fr_mr))
goto out_mr_err;
- f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
- if (IS_ERR(f->fr_pgl))
+
+ f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
+ if (!f->sg)
goto out_list_err;
+
+ sg_init_table(f->sg, depth);
+
return 0;
out_mr_err:
@@ -163,9 +167,9 @@ out_mr_err:
return rc;
out_list_err:
- rc = PTR_ERR(f->fr_pgl);
- dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n",
- __func__, rc);
+ rc = -ENOMEM;
+ dprintk("RPC: %s: sg allocation failure\n",
+ __func__);
ib_dereg_mr(f->fr_mr);
return rc;
}
@@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
if (rc)
dprintk("RPC: %s: ib_dereg_mr status %i\n",
__func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ kfree(r->r.frmr.sg);
}
static int
@@ -312,13 +316,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr;
struct ib_mr *mr;
- struct ib_send_wr fastreg_wr, *bad_wr;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr *bad_wr;
+ int rc, i, n, dma_nents;
u8 key;
- int len, pageoff;
- int i, rc;
- int seg_len;
- u64 pa;
- int page_no;
mw = seg1->rl_mw;
seg1->rl_mw = NULL;
@@ -331,64 +332,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
} while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->r.frmr;
frmr->fr_state = FRMR_IS_VALID;
+ mr = frmr->fr_mr;
- pageoff = offset_in_page(seg1->mr_offset);
- seg1->mr_offset -= pageoff; /* start of page */
- seg1->mr_len += pageoff;
- len = -pageoff;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
- for (page_no = i = 0; i < nsegs;) {
- rpcrdma_map_one(device, seg, direction);
- pa = seg->mr_dma;
- for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
- frmr->fr_pgl->page_list[page_no++] = pa;
- pa += PAGE_SIZE;
- }
- len += seg->mr_len;
+ for (i = 0; i < nsegs;) {
+ if (seg->mr_page)
+ sg_set_page(&frmr->sg[i],
+ seg->mr_page,
+ seg->mr_len,
+ offset_in_page(seg->mr_offset));
+ else
+ sg_set_buf(&frmr->sg[i], seg->mr_offset,
+ seg->mr_len);
+
++seg;
++i;
+
/* Check for holes */
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n",
- __func__, mw, i, len);
-
- memset(&fastreg_wr, 0, sizeof(fastreg_wr));
- fastreg_wr.wr_id = (unsigned long)(void *)mw;
- fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
- fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
- fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fastreg_wr.wr.fast_reg.page_list_len = page_no;
- fastreg_wr.wr.fast_reg.length = len;
- fastreg_wr.wr.fast_reg.access_flags = writing ?
- IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
- IB_ACCESS_REMOTE_READ;
- mr = frmr->fr_mr;
+ frmr->sg_nents = i;
+
+ dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
+ if (!dma_nents) {
+ pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n",
+ __func__, frmr->sg, frmr->sg_nents);
+ return -ENOMEM;
+ }
+
+ n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ if (unlikely(n != frmr->sg_nents)) {
+ pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
+ __func__, frmr->fr_mr, n, frmr->sg_nents);
+ rc = n < 0 ? n : -EINVAL;
+ goto out_senderr;
+ }
+
+ dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
+ __func__, mw, frmr->sg_nents, mr->length);
+
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
- fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+ reg_wr.wr.next = NULL;
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = (uintptr_t)mw;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.wr.send_flags = 0;
+ reg_wr.mr = mr;
+ reg_wr.key = mr->rkey;
+ reg_wr.access = writing ?
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ IB_ACCESS_REMOTE_READ;
DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+ rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr);
if (rc)
goto out_senderr;
+ seg1->mr_dir = direction;
seg1->rl_mw = mw;
seg1->mr_rkey = mr->rkey;
- seg1->mr_base = seg1->mr_dma + pageoff;
- seg1->mr_nsegs = i;
- seg1->mr_len = len;
- return i;
+ seg1->mr_base = mr->iova;
+ seg1->mr_nsegs = frmr->sg_nents;
+ seg1->mr_len = mr->length;
+
+ return frmr->sg_nents;
out_senderr:
dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
- while (i--)
- rpcrdma_unmap_one(device, --seg);
+ ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
__frwr_queue_recovery(mw);
return rc;
}
@@ -402,22 +419,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
struct rpcrdma_mr_seg *seg1 = seg;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw = seg1->rl_mw;
+ struct rpcrdma_frmr *frmr = &mw->r.frmr;
struct ib_send_wr invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FRMR %p\n", __func__, mw);
seg1->rl_mw = NULL;
- mw->r.frmr.fr_state = FRMR_IS_INVALID;
+ frmr->fr_state = FRMR_IS_INVALID;
memset(&invalidate_wr, 0, sizeof(invalidate_wr));
invalidate_wr.wr_id = (unsigned long)(void *)mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
+ invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia->ri_device, seg++);
+ ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
read_lock(&ia->ri_qplock);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f0c3ff67ca98..ff4f01e527ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
u64 rs_offset,
bool last)
{
- struct ib_send_wr read_wr;
+ struct ib_rdma_wr read_wr;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
int ret, read, pno;
@@ -180,16 +180,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
memset(&read_wr, 0, sizeof(read_wr));
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.opcode = IB_WR_RDMA_READ;
- ctxt->wr_op = read_wr.opcode;
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = rs_handle;
- read_wr.wr.rdma.remote_addr = rs_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge = pages_needed;
-
- ret = svc_rdma_send(xprt, &read_wr);
+ read_wr.wr.wr_id = (unsigned long)ctxt;
+ read_wr.wr.opcode = IB_WR_RDMA_READ;
+ ctxt->wr_op = read_wr.wr.opcode;
+ read_wr.wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.rkey = rs_handle;
+ read_wr.remote_addr = rs_offset;
+ read_wr.wr.sg_list = ctxt->sge;
+ read_wr.wr.num_sge = pages_needed;
+
+ ret = svc_rdma_send(xprt, &read_wr.wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -219,14 +219,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
u64 rs_offset,
bool last)
{
- struct ib_send_wr read_wr;
+ struct ib_rdma_wr read_wr;
struct ib_send_wr inv_wr;
- struct ib_send_wr fastreg_wr;
+ struct ib_reg_wr reg_wr;
u8 key;
- int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
- int ret, read, pno;
+ int ret, read, pno, dma_nents, n;
u32 pg_off = *page_offset;
u32 pg_no = *page_no;
@@ -235,17 +235,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
ctxt->direction = DMA_FROM_DEVICE;
ctxt->frmr = frmr;
- pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
- read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
- rs_length);
+ nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
+ read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
- frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
- frmr->map_len = pages_needed << PAGE_SHIFT;
- frmr->page_list_len = pages_needed;
+ frmr->sg_nents = nents;
- for (pno = 0; pno < pages_needed; pno++) {
+ for (pno = 0; pno < nents; pno++) {
int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -253,17 +250,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
head->arg.len += len;
if (!pg_off)
head->count++;
+
+ sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
+ len, pg_off);
+
rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
- frmr->page_list->page_list[pno] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- head->arg.pages[pg_no], 0,
- PAGE_SIZE, DMA_FROM_DEVICE);
- ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[pno]);
- if (ret)
- goto err;
- atomic_inc(&xprt->sc_dma_used);
/* adjust offset and wrap to next page if needed */
pg_off += len;
@@ -279,43 +271,57 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
else
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents,
+ frmr->direction);
+ if (!dma_nents) {
+ pr_err("svcrdma: failed to dma map sg %p\n",
+ frmr->sg);
+ return -ENOMEM;
+ }
+ atomic_inc(&xprt->sc_dma_used);
+
+ n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+ if (unlikely(n != frmr->sg_nents)) {
+ pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
+ frmr->mr, n, frmr->sg_nents);
+ return n < 0 ? n : -EINVAL;
+ }
+
/* Bump the key */
key = (u8)(frmr->mr->lkey & 0x000000FF);
ib_update_fast_reg_key(frmr->mr, ++key);
- ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+ ctxt->sge[0].addr = frmr->mr->iova;
ctxt->sge[0].lkey = frmr->mr->lkey;
- ctxt->sge[0].length = read;
+ ctxt->sge[0].length = frmr->mr->length;
ctxt->count = 1;
ctxt->read_hdr = head;
- /* Prepare FASTREG WR */
- memset(&fastreg_wr, 0, sizeof(fastreg_wr));
- fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.send_flags = IB_SEND_SIGNALED;
- fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
- fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
- fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
- fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fastreg_wr.wr.fast_reg.length = frmr->map_len;
- fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
- fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
- fastreg_wr.next = &read_wr;
+ /* Prepare REG WR */
+ reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg_wr.wr.wr_id = 0;
+ reg_wr.wr.send_flags = IB_SEND_SIGNALED;
+ reg_wr.wr.num_sge = 0;
+ reg_wr.mr = frmr->mr;
+ reg_wr.key = frmr->mr->lkey;
+ reg_wr.access = frmr->access_flags;
+ reg_wr.wr.next = &read_wr.wr;
/* Prepare RDMA_READ */
memset(&read_wr, 0, sizeof(read_wr));
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = rs_handle;
- read_wr.wr.rdma.remote_addr = rs_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge = 1;
+ read_wr.wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.rkey = rs_handle;
+ read_wr.remote_addr = rs_offset;
+ read_wr.wr.sg_list = ctxt->sge;
+ read_wr.wr.num_sge = 1;
if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
- read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+ read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ read_wr.wr.wr_id = (unsigned long)ctxt;
+ read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
} else {
- read_wr.opcode = IB_WR_RDMA_READ;
- read_wr.next = &inv_wr;
+ read_wr.wr.opcode = IB_WR_RDMA_READ;
+ read_wr.wr.next = &inv_wr;
/* Prepare invalidate */
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.wr_id = (unsigned long)ctxt;
@@ -323,10 +329,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
}
- ctxt->wr_op = read_wr.opcode;
+ ctxt->wr_op = read_wr.wr.opcode;
/* Post the chain */
- ret = svc_rdma_send(xprt, &fastreg_wr);
+ ret = svc_rdma_send(xprt, &reg_wr.wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -340,7 +346,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
atomic_inc(&rdma_stat_read);
return ret;
err:
- svc_rdma_unmap_dma(ctxt);
+ ib_dma_unmap_sg(xprt->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents, frmr->direction);
svc_rdma_put_context(ctxt, 0);
svc_rdma_put_frmr(xprt, frmr);
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1dfae8317065..969a1ab75fc3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 xdr_off, int write_len,
struct svc_rdma_req_map *vec)
{
- struct ib_send_wr write_wr;
+ struct ib_rdma_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
int sge_no;
@@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
ctxt->wr_op = IB_WR_RDMA_WRITE;
- write_wr.wr_id = (unsigned long)ctxt;
- write_wr.sg_list = &sge[0];
- write_wr.num_sge = sge_no;
- write_wr.opcode = IB_WR_RDMA_WRITE;
- write_wr.send_flags = IB_SEND_SIGNALED;
- write_wr.wr.rdma.rkey = rmr;
- write_wr.wr.rdma.remote_addr = to;
+ write_wr.wr.wr_id = (unsigned long)ctxt;
+ write_wr.wr.sg_list = &sge[0];
+ write_wr.wr.num_sge = sge_no;
+ write_wr.wr.opcode = IB_WR_RDMA_WRITE;
+ write_wr.wr.send_flags = IB_SEND_SIGNALED;
+ write_wr.rkey = rmr;
+ write_wr.remote_addr = to;
/* Post It */
atomic_inc(&rdma_stat_write);
- if (svc_rdma_send(xprt, &write_wr))
+ if (svc_rdma_send(xprt, &write_wr.wr))
goto err;
return write_len - bc;
err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..a266e870d870 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -692,8 +692,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
- listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
- IB_QPT_RC);
+ listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
{
struct ib_mr *mr;
- struct ib_fast_reg_page_list *pl;
+ struct scatterlist *sg;
struct svc_rdma_fastreg_mr *frmr;
u32 num_sg;
@@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
if (IS_ERR(mr))
goto err_free_frmr;
- pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
- num_sg);
- if (IS_ERR(pl))
+ sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+ if (!sg)
goto err_free_mr;
+ sg_init_table(sg, RPCSVC_MAXPAGES);
+
frmr->mr = mr;
- frmr->page_list = pl;
+ frmr->sg = sg;
INIT_LIST_HEAD(&frmr->frmr_list);
return frmr;
@@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
frmr = list_entry(xprt->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
+ kfree(frmr->sg);
ib_dereg_mr(frmr->mr);
- ib_free_fast_reg_page_list(frmr->page_list);
kfree(frmr);
}
}
@@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
frmr = list_entry(rdma->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
- frmr->map_len = 0;
- frmr->page_list_len = 0;
+ frmr->sg_nents = 0;
}
spin_unlock_bh(&rdma->sc_frmr_q_lock);
if (frmr)
@@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
return rdma_alloc_frmr(rdma);
}
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
- struct svc_rdma_fastreg_mr *frmr)
-{
- int page_no;
- for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
- dma_addr_t addr = frmr->page_list->page_list[page_no];
- if (ib_dma_mapping_error(frmr->mr->device, addr))
- continue;
- atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
- frmr->direction);
- }
-}
-
void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
struct svc_rdma_fastreg_mr *frmr)
{
if (frmr) {
- frmr_unmap_dma(rdma, frmr);
+ ib_dma_unmap_sg(rdma->sc_cm_id->device,
+ frmr->sg, frmr->sg_nents, frmr->direction);
+ atomic_dec(&rdma->sc_dma_used);
spin_lock_bh(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5502d4dade74..f63369bd01c5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -432,7 +432,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
init_completion(&ia->ri_done);
- id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
+ id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
+ IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
dprintk("RPC: %s: rdma_create_id() failed %i\n",
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c09414e6f91b..c82abf44e39d 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -193,7 +193,8 @@ enum rpcrdma_frmr_state {
};
struct rpcrdma_frmr {
- struct ib_fast_reg_page_list *fr_pgl;
+ struct scatterlist *sg;
+ int sg_nents;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
struct work_struct fr_work;