diff options
author | David S. Miller <davem@davemloft.net> | 2008-07-18 02:39:39 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-07-18 02:39:39 -0700 |
commit | 49997d75152b3d23c53b0fa730599f2f74c92c65 (patch) | |
tree | 46e93126170d02cfec9505172e545732c1b69656 /drivers/infiniband/ulp | |
parent | a0c80b80e0fb48129e4e9d6a9ede914f9ff1850d (diff) | |
parent | 5b664cb235e97afbf34db9c4d77f08ebd725335e (diff) | |
download | linux-stable-49997d75152b3d23c53b0fa730599f2f74c92c65.tar.gz linux-stable-49997d75152b3d23c53b0fa730599f2f74c92c65.tar.bz2 linux-stable-49997d75152b3d23c53b0fa730599f2f74c92c65.zip |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
Documentation/powerpc/booting-without-of.txt
drivers/atm/Makefile
drivers/net/fs_enet/fs_enet-main.c
drivers/pci/pci-acpi.c
net/8021q/vlan.c
net/iucv/iucv.c
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/Kconfig | 1 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 48 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 104 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 46 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_fs.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 52 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 115 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 27 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 69 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 359 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 46 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 211 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 79 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 30 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 2 |
17 files changed, 727 insertions, 481 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 1f76bad020f3..691525cf394a 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" depends on NETDEVICES && INET && (IPV6 || IPV6=n) + select INET_LRO ---help--- Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca126fc2b853..b0ffc9abe8c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $ */ #ifndef _IPOIB_H @@ -52,9 +50,16 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_pack.h> #include <rdma/ib_sa.h> +#include <linux/inet_lro.h> /* constants */ +enum ipoib_flush_level { + IPOIB_FLUSH_LIGHT, + IPOIB_FLUSH_NORMAL, + IPOIB_FLUSH_HEAVY +}; + enum { IPOIB_ENCAP_LEN = 4, @@ -65,8 +70,8 @@ enum { IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, - IPOIB_RX_RING_SIZE = 128, - IPOIB_TX_RING_SIZE = 64, + IPOIB_RX_RING_SIZE = 256, + IPOIB_TX_RING_SIZE = 128, IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MIN_QUEUE_SIZE = 2, IPOIB_CM_MAX_CONN_QP = 4096, @@ -84,7 +89,6 @@ enum { IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, - IPOIB_MCAST_STARTED = 8, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_CSUM = 11, @@ -96,7 +100,11 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + IPOIB_MAX_LRO_DESCRIPTORS = 8, + IPOIB_LRO_MAX_AGGR = 64, + MAX_SEND_CQE = 16, + IPOIB_CM_COPYBREAK = 256, }; #define IPOIB_OP_RECV (1ul << 31) @@ -149,6 +157,11 @@ struct ipoib_tx_buf { u64 mapping[MAX_SKB_FRAGS + 1]; }; +struct ipoib_cm_tx_buf { + struct sk_buff *skb; + u64 mapping; +}; + struct ib_cm_id; struct ipoib_cm_data { @@ -207,7 +220,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_tx_buf *tx_ring; + struct ipoib_cm_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; @@ -249,6 +262,11 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_lro { + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS]; +}; + /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -264,7 +282,6 @@ struct ipoib_dev_priv { unsigned long flags; - struct mutex mcast_mutex; struct mutex vlan_mutex; struct rb_root path_tree; @@ -276,10 +293,11 @@ struct ipoib_dev_priv { struct delayed_work pkey_poll_task; struct delayed_work mcast_task; - struct work_struct flush_task; + struct work_struct flush_light; + struct work_struct flush_normal; + struct work_struct flush_heavy; struct work_struct restart_task; struct delayed_work ah_reap_task; - struct work_struct pkey_event_task; struct ib_device *ca; u8 port; @@ -335,6 +353,8 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + + struct ipoib_lro lro; }; struct ipoib_ah { @@ -359,6 +379,7 @@ struct ipoib_path { struct rb_node rb_node; struct list_head list; + int valid; }; struct ipoib_neigh { @@ -423,11 +444,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_ib_dev_flush(struct work_struct *work); +void ipoib_ib_dev_flush_light(struct work_struct *work); +void ipoib_ib_dev_flush_normal(struct work_struct *work); +void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); @@ -466,9 +490,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif int ipoib_mcast_attach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); + union ib_gid *mgid, int set_qkey); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 97e67d36378f..0f2d3045061a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <rdma/ib_cm.h> @@ -113,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) } static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, - struct ipoib_cm_rx *rx, int id) + struct ipoib_cm_rx *rx, + struct ib_recv_wr *wr, + struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; - priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < IPOIB_CM_RX_SG; ++i) - priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; + sge[i].addr = rx->rx_ring[id].mapping[i]; - ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_recv(rx->qp, wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, @@ -322,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev, return 0; } +static void ipoib_cm_init_rx_wr(struct net_device *dev, + struct ib_recv_wr *wr, + struct ib_sge *sge) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < priv->cm.num_frags; ++i) + sge[i].lkey = priv->mr->lkey; + + sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < priv->cm.num_frags; ++i) + sge[i].length = PAGE_SIZE; + + wr->next = NULL; + wr->sg_list = priv->cm.rx_sge; + wr->num_sge = priv->cm.num_frags; +} + static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); + struct { + struct ib_recv_wr wr; + struct ib_sge sge[IPOIB_CM_RX_SG]; + } *t; int ret; int i; @@ -333,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; + t = kmalloc(sizeof *t, GFP_KERNEL); + if (!t) { + ret = -ENOMEM; + goto err_free; + } + + ipoib_cm_init_rx_wr(dev, &t->wr, t->sge); + spin_lock_irq(&priv->lock); if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { @@ -351,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; goto err_count; - } - ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); + } + ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); if (ret) { ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " "failed for buf %d\n", i); @@ -363,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i rx->recv_count = ipoib_recvq_size; + kfree(t); + return 0; err_count: @@ -371,6 +404,7 @@ err_count: spin_unlock_irq(&priv->lock); err_free: + kfree(t); ipoib_cm_free_rx_ring(dev, rx->rx_ring); return ret; @@ -525,6 +559,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) u64 mapping[IPOIB_CM_RX_SG]; int frags; int has_srq; + struct sk_buff *small_skb; ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -579,6 +614,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } + if (wc->byte_len < IPOIB_CM_COPYBREAK) { + int dlen = wc->byte_len; + + small_skb = dev_alloc_skb(dlen + 12); + if (small_skb) { + skb_reserve(small_skb, 12); + ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_copy_from_linear_data(skb, small_skb->data, dlen); + ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_put(small_skb, dlen); + skb = small_skb; + goto copied; + } + } + frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; @@ -601,6 +653,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); +copied: skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); @@ -620,7 +673,10 @@ repost: ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " "for buf %d\n", wr_id); } else { - if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { + if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, + &priv->cm.rx_wr, + priv->cm.rx_sge, + wr_id))) { --p->recv_count; ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " "for buf %d\n", wr_id); @@ -647,7 +703,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; u64 addr; if (unlikely(skb->len > tx->mtu)) { @@ -678,7 +734,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } - tx_req->mapping[0] = addr; + tx_req->mapping = addr; if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), addr, skb->len))) { @@ -703,7 +759,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -717,7 +773,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -1087,7 +1143,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; unsigned long begin; @@ -1115,7 +1171,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; @@ -1384,7 +1440,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + rtnl_lock(); dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); + rtnl_unlock(); priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); @@ -1393,14 +1451,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); - dev->mtu = min(priv->mcast_mtu, dev->mtu); - ipoib_flush_paths(dev); + rtnl_lock(); if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) { dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; if (priv->hca_caps & IB_DEVICE_UD_TSO) dev->features |= NETIF_F_TSO; } + dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); + rtnl_unlock(); + ipoib_flush_paths(dev); return count; } @@ -1485,15 +1545,7 @@ int ipoib_cm_dev_init(struct net_device *dev) priv->cm.num_frags = IPOIB_CM_RX_SG; } - for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].lkey = priv->mr->lkey; - - priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].length = PAGE_SIZE; - priv->cm.rx_wr.next = NULL; - priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = priv->cm.num_frags; + ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge); if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 10279b79c44d..66af5c1a76e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev, return 0; } +static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = { + "LRO aggregated", "LRO flushed", + "LRO avg aggr", "LRO no desc" +}; + +static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + switch (stringset) { + case ETH_SS_STATS: + memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys)); + break; + } +} + +static int ipoib_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ipoib_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void ipoib_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int index = 0; + + /* Get LRO statistics */ + data[index++] = priv->lro.lro_mgr.stats.aggregated; + data[index++] = priv->lro.lro_mgr.stats.flushed; + if (priv->lro.lro_mgr.stats.flushed) + data[index++] = priv->lro.lro_mgr.stats.aggregated / + priv->lro.lro_mgr.stats.flushed; + else + data[index++] = 0; + data[index++] = priv->lro.lro_mgr.stats.no_desc; +} + static const struct ethtool_ops ipoib_ethtool_ops = { .get_drvinfo = ipoib_get_drvinfo, .get_tso = ethtool_op_get_tso, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = ipoib_get_strings, + .get_sset_count = ipoib_get_sset_count, + .get_ethtool_stats = ipoib_get_ethtool_stats, }; void ipoib_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 8b882bbd1d05..961c585da216 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $ */ #include <linux/err.h> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f429bce24c20..66cafa20c246 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $ */ #include <linux/delay.h> @@ -290,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) skb->ip_summed = CHECKSUM_UNNECESSARY; - netif_receive_skb(skb); + if (dev->features & NETIF_F_LRO) + lro_receive_skb(&priv->lro.lro_mgr, skb, NULL); + else + netif_receive_skb(skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) @@ -442,6 +443,9 @@ poll_more: } if (done < budget) { + if (dev->features & NETIF_F_LRO) + lro_flush_all(&priv->lro.lro_mgr); + netif_rx_complete(dev, napi); if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | @@ -898,7 +902,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; } -static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) +static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; @@ -911,7 +916,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, pkey_event); + __ipoib_ib_dev_flush(cpriv, level); mutex_unlock(&priv->vlan_mutex); @@ -925,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) return; } - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ipoib_ib_dev_down(dev, 0); @@ -943,11 +948,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) priv->pkey_index = new_index; } - ipoib_dbg(priv, "flushing\n"); + if (level == IPOIB_FLUSH_LIGHT) { + ipoib_mark_paths_invalid(dev); + ipoib_mcast_dev_flush(dev); + } - ipoib_ib_dev_down(dev, 0); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_down(dev, 0); - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_open(dev); } @@ -957,27 +966,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * we get here, don't bring it back up if it's not configured up */ if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { - ipoib_ib_dev_up(dev); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_up(dev); ipoib_mcast_restart_task(&priv->restart_task); } } -void ipoib_ib_dev_flush(struct work_struct *work) +void ipoib_ib_dev_flush_light(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, flush_light); + + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); +} + +void ipoib_ib_dev_flush_normal(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, flush_task); + container_of(work, struct ipoib_dev_priv, flush_normal); - ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); } -void ipoib_pkey_event(struct work_struct *work) +void ipoib_ib_dev_flush_heavy(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_event_task); + container_of(work, struct ipoib_dev_priv, flush_heavy); - ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 1); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); } void ipoib_ib_dev_cleanup(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2442090ac8d1..8be9ea0436e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $ */ #include "ipoib.h" @@ -62,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); +static int lro; +module_param(lro, bool, 0444); +MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)"); + +static int lro_max_aggr = IPOIB_LRO_MAX_AGGR; +module_param(lro_max_aggr, int, 0644); +MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated " + "(default = 64)"); + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level; @@ -350,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ +void ipoib_mark_paths_invalid(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path, *tp; + + spin_lock_irq(&priv->lock); + + list_for_each_entry_safe(path, tp, &priv->path_list, list) { + ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n", + be16_to_cpu(path->pathrec.dlid), + IPOIB_GID_ARG(path->pathrec.dgid)); + path->valid = 0; + } + + spin_unlock_irq(&priv->lock); +} + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -386,6 +410,7 @@ static void path_rec_completion(int status, struct net_device *dev = path->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah = NULL; + struct ipoib_ah *old_ah; struct ipoib_neigh *neigh, *tn; struct sk_buff_head skqueue; struct sk_buff *skb; @@ -409,6 +434,7 @@ static void path_rec_completion(int status, spin_lock_irqsave(&priv->lock, flags); + old_ah = path->ah; path->ah = ah; if (ah) { @@ -421,6 +447,17 @@ static void path_rec_completion(int status, __skb_queue_tail(&skqueue, skb); list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { + if (neigh->ah) { + WARN_ON(neigh->ah != old_ah); + /* + * Dropping the ah reference inside + * priv->lock is safe here, because we + * will hold one more reference from + * the original value of path->ah (ie + * old_ah). + */ + ipoib_put_ah(neigh->ah); + } kref_get(&path->ah->ref); neigh->ah = path->ah; memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, @@ -443,6 +480,7 @@ static void path_rec_completion(int status, while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } + path->valid = 1; } path->query = NULL; @@ -450,6 +488,9 @@ static void path_rec_completion(int status, spin_unlock_irqrestore(&priv->lock, flags); + if (old_ah) + ipoib_put_ah(old_ah); + while ((skb = __skb_dequeue(&skqueue))) { skb->dev = dev; if (dev_queue_xmit(skb)) @@ -623,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock(&priv->lock); path = __path_find(dev, phdr->hwaddr + 4); - if (!path) { - path = path_rec_create(dev, phdr->hwaddr + 4); + if (!path || !path->valid) { + if (!path) + path = path_rec_create(dev, phdr->hwaddr + 4); if (path) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof *phdr); @@ -938,6 +980,54 @@ static const struct header_ops ipoib_header_ops = { .create = ipoib_hard_header, }; +static int get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + + if (unlikely(skb->protocol != htons(ETH_P_IP))) + return -1; + + /* + * In the future we may add an else clause that verifies the + * checksum and allows devices which do not calculate checksum + * to use LRO. + */ + if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY)) + return -1; + + /* Check for non-TCP packet */ + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + /* check if IP header and TCP header are complete */ + if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + + return 0; +} + +static void ipoib_lro_setup(struct ipoib_dev_priv *priv) +{ + priv->lro.lro_mgr.max_aggr = lro_max_aggr; + priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS; + priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc; + priv->lro.lro_mgr.get_skb_header = get_skb_hdr; + priv->lro.lro_mgr.features = LRO_F_NAPI; + priv->lro.lro_mgr.dev = priv->dev; + priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; +} + static void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -977,10 +1067,11 @@ static void ipoib_setup(struct net_device *dev) priv->dev = dev; + ipoib_lro_setup(priv); + spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - mutex_init(&priv->mcast_mutex); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); @@ -989,9 +1080,10 @@ static void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); - INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); - INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); + INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); + INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); + INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); } @@ -1154,6 +1246,9 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; } + if (lro) + priv->dev->features |= NETIF_F_LRO; + /* * Set the full membership bit, so that we join the right * broadcast group, etc. @@ -1304,6 +1399,12 @@ static int __init ipoib_init_module(void) ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif + /* + * When copying small received packets, we only copy from the + * linear data part of the SKB, so we rely on this condition. + */ + BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); + ret = ipoib_register_debugfs(); if (ret) return ret; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index cd2fb955040f..8950e9546f4e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ */ #include <linux/skbuff.h> @@ -188,6 +186,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah; int ret; + int set_qkey = 0; mcast->mcmember = *mcmember; @@ -202,6 +201,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; + set_qkey = 1; } if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -214,7 +214,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + &mcast->mcmember.mgid, set_qkey); if (ret < 0) { ipoib_warn(priv, "couldn't attach QP to multicast group " IPOIB_GID_FMT "\n", @@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - if (!ipoib_cm_admin_enabled(dev)) - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + if (!ipoib_cm_admin_enabled(dev)) { + rtnl_lock(); + dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); + rtnl_unlock(); + } ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); @@ -594,10 +597,6 @@ int ipoib_mcast_start_thread(struct net_device *dev) queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); - spin_lock_irq(&priv->lock); - set_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - return 0; } @@ -607,10 +606,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) ipoib_dbg_mcast(priv, "stopping multicast thread\n"); - spin_lock_irq(&priv->lock); - clear_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - mutex_lock(&mcast_mutex); clear_bit(IPOIB_MCAST_RUN, &priv->flags); cancel_delayed_work(&priv->mcast_task); @@ -635,10 +630,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) IPOIB_GID_ARG(mcast->mcmember.mgid)); /* Remove ourselves from the multicast group */ - ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, + be16_to_cpu(mcast->mcmember.mlid)); if (ret) - ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); + ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); } return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8766d29ce3b7..68325119f740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -29,24 +29,17 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ */ #include "ipoib.h" -int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) +int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_qp_attr *qp_attr; + struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; - ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); - if (!qp_attr) - goto out; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; @@ -54,18 +47,23 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - /* set correct QKey for QP */ - qp_attr->qkey = priv->qkey; - ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); - if (ret) { - ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); - goto out; + if (set_qkey) { + ret = -ENOMEM; + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + goto out; + + /* set correct QKey for QP */ + qp_attr->qkey = priv->qkey; + ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); + if (ret) { + ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); + goto out; + } } /* attach QP to multicast group */ - mutex_lock(&priv->mcast_mutex); ret = ib_attach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); @@ -74,20 +72,6 @@ out: return ret; } -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - int ret; - - mutex_lock(&priv->mcast_mutex); - ret = ib_detach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); - if (ret) - ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); - - return ret; -} - int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -201,7 +185,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.recv_cq = priv->recv_cq; if (priv->hca_caps & IB_DEVICE_UD_TSO) - init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; + init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + + if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) + init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (dev->features & NETIF_F_SG) init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; @@ -289,15 +276,17 @@ void ipoib_event(struct ib_event_handler *handler, if (record->element.port_num != priv->port) return; - if (record->event == IB_EVENT_PORT_ERR || - record->event == IB_EVENT_PORT_ACTIVE || - record->event == IB_EVENT_LID_CHANGE || - record->event == IB_EVENT_SM_CHANGE || + ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, + record->device->name, record->element.port_num); + + if (record->event == IB_EVENT_SM_CHANGE || record->event == IB_EVENT_CLIENT_REREGISTER) { - ipoib_dbg(priv, "Port state change event\n"); - queue_work(ipoib_workqueue, &priv->flush_task); + queue_work(ipoib_workqueue, &priv->flush_light); + } else if (record->event == IB_EVENT_PORT_ERR || + record->event == IB_EVENT_PORT_ACTIVE || + record->event == IB_EVENT_LID_CHANGE) { + queue_work(ipoib_workqueue, &priv->flush_normal); } else if (record->event == IB_EVENT_PKEY_CHANGE) { - ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); - queue_work(ipoib_workqueue, &priv->pkey_event_task); + queue_work(ipoib_workqueue, &priv->flush_heavy); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 1cdb5cfb0ff1..b08eb56196d3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/module.h> diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index aeb58cae9a3f..5a1cf2580e16 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -42,9 +42,6 @@ * Zhenyu Wang * Modified by: * Erez Zilber - * - * - * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $ */ #include <linux/types.h> @@ -74,6 +71,10 @@ #include "iscsi_iser.h" +static struct scsi_host_template iscsi_iser_sht; +static struct iscsi_transport iscsi_iser_transport; +static struct scsi_transport_template *iscsi_iser_scsi_transport; + static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); @@ -94,7 +95,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, char *rx_data, int rx_data_len) { int rc = 0; - uint32_t ret_itt; int datalen; int ahslen; @@ -110,12 +110,7 @@ iscsi_iser_recv(struct iscsi_conn *conn, /* read AHS */ ahslen = hdr->hlength * 4; - /* verify itt (itt encoding: age+cid+itt) */ - rc = iscsi_verify_itt(conn, hdr, &ret_itt); - - if (!rc) - rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); - + rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) goto error; @@ -126,25 +121,33 @@ error: /** - * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands + * iscsi_iser_task_init - Initialize task + * @task: iscsi task * - **/ + * Initialize the task for the scsi command or mgmt command. + */ static int -iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) +iscsi_iser_task_init(struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_conn *iser_conn = task->conn->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; - iser_ctask->command_sent = 0; - iser_ctask->iser_conn = iser_conn; - iser_ctask_rdma_init(iser_ctask); + /* mgmt task */ + if (!task->sc) { + iser_task->desc.data = task->data; + return 0; + } + + iser_task->command_sent = 0; + iser_task->iser_conn = iser_conn; + iser_task_rdma_init(iser_task); return 0; } /** - * iscsi_mtask_xmit - xmit management(immediate) task + * iscsi_iser_mtask_xmit - xmit management(immediate) task * @conn: iscsi connection - * @mtask: task management task + * @task: task management task * * Notes: * The function can return -EAGAIN in which case caller must @@ -153,20 +156,19 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) * **/ static int -iscsi_iser_mtask_xmit(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask) +iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; - debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt); + debug_scsi("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); - error = iser_send_control(conn, mtask); + error = iser_send_control(conn, task); - /* since iser xmits control with zero copy, mtasks can not be recycled + /* since iser xmits control with zero copy, tasks can not be recycled * right after sending them. * The recycling scheme is based on whether a response is expected - * - if yes, the mtask is recycled at iscsi_complete_pdu - * - if no, the mtask is recycled at iser_snd_completion + * - if yes, the task is recycled at iscsi_complete_pdu + * - if no, the task is recycled at iser_snd_completion */ if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); @@ -175,97 +177,86 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, } static int -iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) +iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn, + struct iscsi_task *task) { struct iscsi_data hdr; int error = 0; /* Send data-out PDUs while there's still unsolicited data to send */ - while (ctask->unsol_count > 0) { - iscsi_prep_unsolicit_data_pdu(ctask, &hdr); + while (task->unsol_count > 0) { + iscsi_prep_unsolicit_data_pdu(task, &hdr); debug_scsi("Sending data-out: itt 0x%x, data count %d\n", - hdr.itt, ctask->data_count); + hdr.itt, task->data_count); /* the buffer description has been passed with the command */ /* Send the command */ - error = iser_send_data_out(conn, ctask, &hdr); + error = iser_send_data_out(conn, task, &hdr); if (error) { - ctask->unsol_datasn--; - goto iscsi_iser_ctask_xmit_unsol_data_exit; + task->unsol_datasn--; + goto iscsi_iser_task_xmit_unsol_data_exit; } - ctask->unsol_count -= ctask->data_count; + task->unsol_count -= task->data_count; debug_scsi("Need to send %d more as data-out PDUs\n", - ctask->unsol_count); + task->unsol_count); } -iscsi_iser_ctask_xmit_unsol_data_exit: +iscsi_iser_task_xmit_unsol_data_exit: return error; } static int -iscsi_iser_ctask_xmit(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) +iscsi_iser_task_xmit(struct iscsi_task *task) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_conn *conn = task->conn; + struct iscsi_iser_task *iser_task = task->dd_data; int error = 0; - if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) { - BUG_ON(scsi_bufflen(ctask->sc) == 0); + if (!task->sc) + return iscsi_iser_mtask_xmit(conn, task); + + if (task->sc->sc_data_direction == DMA_TO_DEVICE) { + BUG_ON(scsi_bufflen(task->sc) == 0); debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", - ctask->itt, scsi_bufflen(ctask->sc), - ctask->imm_count, ctask->unsol_count); + task->itt, scsi_bufflen(task->sc), + task->imm_count, task->unsol_count); } - debug_scsi("ctask deq [cid %d itt 0x%x]\n", - conn->id, ctask->itt); + debug_scsi("task deq [cid %d itt 0x%x]\n", + conn->id, task->itt); /* Send the cmd PDU */ - if (!iser_ctask->command_sent) { - error = iser_send_command(conn, ctask); + if (!iser_task->command_sent) { + error = iser_send_command(conn, task); if (error) - goto iscsi_iser_ctask_xmit_exit; - iser_ctask->command_sent = 1; + goto iscsi_iser_task_xmit_exit; + iser_task->command_sent = 1; } /* Send unsolicited data-out PDU(s) if necessary */ - if (ctask->unsol_count) - error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); + if (task->unsol_count) + error = iscsi_iser_task_xmit_unsol_data(conn, task); - iscsi_iser_ctask_xmit_exit: + iscsi_iser_task_xmit_exit: if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } static void -iscsi_iser_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +iscsi_iser_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; - - if (iser_ctask->status == ISER_TASK_STATUS_STARTED) { - iser_ctask->status = ISER_TASK_STATUS_COMPLETED; - iser_ctask_rdma_finalize(iser_ctask); - } -} + struct iscsi_iser_task *iser_task = task->dd_data; -static struct iser_conn * -iscsi_iser_ib_conn_lookup(__u64 ep_handle) -{ - struct iser_conn *ib_conn; - struct iser_conn *uib_conn = (struct iser_conn *)(unsigned long)ep_handle; + /* mgmt tasks do not need special cleanup */ + if (!task->sc) + return; - mutex_lock(&ig.connlist_mutex); - list_for_each_entry(ib_conn, &ig.connlist, conn_list) { - if (ib_conn == uib_conn) { - mutex_unlock(&ig.connlist_mutex); - return ib_conn; - } + if (iser_task->status == ISER_TASK_STATUS_STARTED) { + iser_task->status = ISER_TASK_STATUS_COMPLETED; + iser_task_rdma_finalize(iser_task); } - mutex_unlock(&ig.connlist_mutex); - iser_err("no conn exists for eph %llx\n",(unsigned long long)ep_handle); - return NULL; } static struct iscsi_cls_conn * @@ -275,7 +266,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) struct iscsi_cls_conn *cls_conn; struct iscsi_iser_conn *iser_conn; - cls_conn = iscsi_conn_setup(cls_session, conn_idx); + cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx); if (!cls_conn) return NULL; conn = cls_conn->dd_data; @@ -286,21 +277,11 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) */ conn->max_recv_dlength = 128; - iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL); - if (!iser_conn) - goto conn_alloc_fail; - - /* currently this is the only field which need to be initiated */ - rwlock_init(&iser_conn->lock); - + iser_conn = conn->dd_data; conn->dd_data = iser_conn; iser_conn->iscsi_conn = conn; return cls_conn; - -conn_alloc_fail: - iscsi_conn_teardown(cls_conn); - return NULL; } static void @@ -308,11 +289,18 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = iser_conn->ib_conn; iscsi_conn_teardown(cls_conn); - if (iser_conn->ib_conn) - iser_conn->ib_conn->iser_conn = NULL; - kfree(iser_conn); + /* + * Userspace will normally call the stop callback and + * already have freed the ib_conn, but if it goofed up then + * we free it here. + */ + if (ib_conn) { + ib_conn->iser_conn = NULL; + iser_conn_put(ib_conn); + } } static int @@ -323,6 +311,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn; struct iser_conn *ib_conn; + struct iscsi_endpoint *ep; int error; error = iscsi_conn_bind(cls_session, cls_conn, is_leading); @@ -331,12 +320,14 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, /* the transport ep handle comes from user space so it must be * verified against the global ib connections list */ - ib_conn = iscsi_iser_ib_conn_lookup(transport_eph); - if (!ib_conn) { + ep = iscsi_lookup_endpoint(transport_eph); + if (!ep) { iser_err("can't bind eph %llx\n", (unsigned long long)transport_eph); return -EINVAL; } + ib_conn = ep->dd_data; + /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ @@ -344,10 +335,30 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, iser_conn = conn->dd_data; ib_conn->iser_conn = iser_conn; iser_conn->ib_conn = ib_conn; + iser_conn_get(ib_conn); + return 0; +} - conn->recv_lock = &iser_conn->lock; +static void +iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = iser_conn->ib_conn; - return 0; + /* + * Userspace may have goofed up and not bound the connection or + * might have only partially setup the connection. + */ + if (ib_conn) { + iscsi_conn_stop(cls_conn, flag); + /* + * There is no unbind event so the stop callback + * must release the ref from the bind. + */ + iser_conn_put(ib_conn); + } + iser_conn->ib_conn = NULL; } static int @@ -363,55 +374,75 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) return iscsi_conn_start(cls_conn); } -static struct iscsi_transport iscsi_iser_transport; +static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) +{ + struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + + iscsi_host_remove(shost); + iscsi_host_free(shost); +} static struct iscsi_cls_session * -iscsi_iser_session_create(struct iscsi_transport *iscsit, - struct scsi_transport_template *scsit, - uint16_t cmds_max, uint16_t qdepth, - uint32_t initial_cmdsn, uint32_t *hostno) +iscsi_iser_session_create(struct iscsi_endpoint *ep, + uint16_t cmds_max, uint16_t qdepth, + uint32_t initial_cmdsn, uint32_t *hostno) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; + struct Scsi_Host *shost; int i; - uint32_t hn; - struct iscsi_cmd_task *ctask; - struct iscsi_mgmt_task *mtask; - struct iscsi_iser_cmd_task *iser_ctask; - struct iser_desc *desc; + struct iscsi_task *task; + struct iscsi_iser_task *iser_task; + struct iser_conn *ib_conn; + + shost = iscsi_host_alloc(&iscsi_iser_sht, 0, ISCSI_MAX_CMD_PER_LUN); + if (!shost) + return NULL; + shost->transportt = iscsi_iser_scsi_transport; + shost->max_lun = iscsi_max_lun; + shost->max_id = 0; + shost->max_channel = 0; + shost->max_cmd_len = 16; + + /* + * older userspace tools (before 2.0-870) did not pass us + * the leading conn's ep so this will be NULL; + */ + if (ep) + ib_conn = ep->dd_data; + + if (iscsi_host_add(shost, + ep ? ib_conn->device->ib_device->dma_device : NULL)) + goto free_host; + *hostno = shost->host_no; /* * we do not support setting can_queue cmd_per_lun from userspace yet * because we preallocate so many resources */ - cls_session = iscsi_session_setup(iscsit, scsit, + cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, ISCSI_DEF_XMIT_CMDS_MAX, - ISCSI_MAX_CMD_PER_LUN, - sizeof(struct iscsi_iser_cmd_task), - sizeof(struct iser_desc), - initial_cmdsn, &hn); + sizeof(struct iscsi_iser_task), + initial_cmdsn, 0); if (!cls_session) - return NULL; - - *hostno = hn; - session = class_to_transport_session(cls_session); + goto remove_host; + session = cls_session->dd_data; + shost->can_queue = session->scsi_cmds_max; /* libiscsi setup itts, data and pool so just set desc fields */ for (i = 0; i < session->cmds_max; i++) { - ctask = session->cmds[i]; - iser_ctask = ctask->dd_data; - ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header; - ctask->hdr_max = sizeof(iser_ctask->desc.iscsi_header); - } - - for (i = 0; i < session->mgmtpool_max; i++) { - mtask = session->mgmt_cmds[i]; - desc = mtask->dd_data; - mtask->hdr = &desc->iscsi_header; - desc->data = mtask->data; + task = session->cmds[i]; + iser_task = task->dd_data; + task->hdr = (struct iscsi_cmd *)&iser_task->desc.iscsi_header; + task->hdr_max = sizeof(iser_task->desc.iscsi_header); } - return cls_session; + +remove_host: + iscsi_host_remove(shost); +free_host: + iscsi_host_free(shost); + return NULL; } static int @@ -484,34 +515,37 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s stats->custom[3].value = conn->fmr_unalign_cnt; } -static int -iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking, - __u64 *ep_handle) +static struct iscsi_endpoint * +iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking) { int err; struct iser_conn *ib_conn; + struct iscsi_endpoint *ep; - err = iser_conn_init(&ib_conn); - if (err) - goto out; + ep = iscsi_create_endpoint(sizeof(*ib_conn)); + if (!ep) + return ERR_PTR(-ENOMEM); - err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, non_blocking); - if (!err) - *ep_handle = (__u64)(unsigned long)ib_conn; + ib_conn = ep->dd_data; + ib_conn->ep = ep; + iser_conn_init(ib_conn); -out: - return err; + err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, + non_blocking); + if (err) { + iscsi_destroy_endpoint(ep); + return ERR_PTR(err); + } + return ep; } static int -iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) +iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { - struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); + struct iser_conn *ib_conn; int rc; - if (!ib_conn) - return -EINVAL; - + ib_conn = ep->dd_data; rc = wait_event_interruptible_timeout(ib_conn->wait, ib_conn->state == ISER_CONN_UP, msecs_to_jiffies(timeout_ms)); @@ -533,13 +567,21 @@ iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) } static void -iscsi_iser_ep_disconnect(__u64 ep_handle) +iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { struct iser_conn *ib_conn; - ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); - if (!ib_conn) - return; + ib_conn = ep->dd_data; + if (ib_conn->iser_conn) + /* + * Must suspend xmit path if the ep is bound to the + * iscsi_conn, so we know we are not accessing the ib_conn + * when we free it. + * + * This may not be bound if the ep poll failed. + */ + iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); + iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state); iser_conn_terminate(ib_conn); @@ -550,7 +592,6 @@ static struct scsi_host_template iscsi_iser_sht = { .name = "iSCSI Initiator over iSER, v." DRV_VER, .queuecommand = iscsi_queuecommand, .change_queue_depth = iscsi_change_queue_depth, - .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, .max_sectors = 1024, .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, @@ -584,17 +625,14 @@ static struct iscsi_transport iscsi_iser_transport = { ISCSI_USERNAME | ISCSI_PASSWORD | ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | - ISCSI_PING_TMO | ISCSI_RECV_TMO, + ISCSI_PING_TMO | ISCSI_RECV_TMO | + ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME | ISCSI_HOST_INITIATOR_NAME, - .host_template = &iscsi_iser_sht, - .conndata_size = sizeof(struct iscsi_conn), - .max_lun = ISCSI_ISER_MAX_LUN, - .max_cmd_len = ISCSI_ISER_MAX_CMD_LEN, /* session management */ .create_session = iscsi_iser_session_create, - .destroy_session = iscsi_session_teardown, + .destroy_session = iscsi_iser_session_destroy, /* connection management */ .create_conn = iscsi_iser_conn_create, .bind_conn = iscsi_iser_conn_bind, @@ -603,17 +641,16 @@ static struct iscsi_transport iscsi_iser_transport = { .get_conn_param = iscsi_conn_get_param, .get_session_param = iscsi_session_get_param, .start_conn = iscsi_iser_conn_start, - .stop_conn = iscsi_conn_stop, + .stop_conn = iscsi_iser_conn_stop, /* iscsi host params */ .get_host_param = iscsi_host_get_param, .set_host_param = iscsi_host_set_param, /* IO */ .send_pdu = iscsi_conn_send_pdu, .get_stats = iscsi_iser_conn_get_stats, - .init_cmd_task = iscsi_iser_cmd_init, - .xmit_cmd_task = iscsi_iser_ctask_xmit, - .xmit_mgmt_task = iscsi_iser_mtask_xmit, - .cleanup_cmd_task = iscsi_iser_cleanup_ctask, + .init_task = iscsi_iser_task_init, + .xmit_task = iscsi_iser_task_xmit, + .cleanup_task = iscsi_iser_cleanup_task, /* recovery */ .session_recovery_timedout = iscsi_session_recovery_timedout, @@ -633,8 +670,6 @@ static int __init iser_init(void) return -EINVAL; } - iscsi_iser_transport.max_lun = iscsi_max_lun; - memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", @@ -650,7 +685,9 @@ static int __init iser_init(void) mutex_init(&ig.connlist_mutex); INIT_LIST_HEAD(&ig.connlist); - if (!iscsi_register_transport(&iscsi_iser_transport)) { + iscsi_iser_scsi_transport = iscsi_register_transport( + &iscsi_iser_transport); + if (!iscsi_iser_scsi_transport) { iser_err("iscsi_register_transport failed\n"); err = -EINVAL; goto register_transport_failure; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a8c1b300e34d..81a82628a5f1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -36,8 +36,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $ */ #ifndef __ISCSI_ISER_H__ #define __ISCSI_ISER_H__ @@ -96,7 +94,6 @@ /* support upto 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISCSI_ISER_MAX_LUN 256 -#define ISCSI_ISER_MAX_CMD_LEN 16 /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ @@ -174,7 +171,8 @@ struct iser_data_buf { /* fwd declarations */ struct iser_device; struct iscsi_iser_conn; -struct iscsi_iser_cmd_task; +struct iscsi_iser_task; +struct iscsi_endpoint; struct iser_mem_reg { u32 lkey; @@ -198,7 +196,7 @@ struct iser_regd_buf { #define MAX_REGD_BUF_VECTOR_LEN 2 struct iser_dto { - struct iscsi_iser_cmd_task *ctask; + struct iscsi_iser_task *task; struct iser_conn *ib_conn; int notify_enable; @@ -242,7 +240,9 @@ struct iser_device { struct iser_conn { struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ + struct iscsi_endpoint *ep; enum iser_ib_conn_state state; /* rdma connection state */ + atomic_t refcount; spinlock_t lock; /* used for state changes */ struct iser_device *device; /* device context */ struct rdma_cm_id *cma_id; /* CMA ID */ @@ -261,11 +261,9 @@ struct iser_conn { struct iscsi_iser_conn { struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ struct iser_conn *ib_conn; /* iSER IB conn */ - - rwlock_t lock; }; -struct iscsi_iser_cmd_task { +struct iscsi_iser_task { struct iser_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_status status; @@ -298,22 +296,26 @@ extern int iser_debug_level; /* allocate connection resources needed for rdma functionality */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); -int iser_send_control(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask); +int iser_send_control(struct iscsi_conn *conn, + struct iscsi_task *task); -int iser_send_command(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask); +int iser_send_command(struct iscsi_conn *conn, + struct iscsi_task *task); -int iser_send_data_out(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask, - struct iscsi_data *hdr); +int iser_send_data_out(struct iscsi_conn *conn, + struct iscsi_task *task, + struct iscsi_data *hdr); void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, char *rx_data, int rx_data_len); -int iser_conn_init(struct iser_conn **ib_conn); +void iser_conn_init(struct iser_conn *ib_conn); + +void iser_conn_get(struct iser_conn *ib_conn); + +void iser_conn_put(struct iser_conn *ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); @@ -322,9 +324,9 @@ void iser_rcv_completion(struct iser_desc *desc, void iser_snd_completion(struct iser_desc *desc); -void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); +void iser_task_rdma_init(struct iscsi_iser_task *task); -void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask); +void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_dto_buffs_release(struct iser_dto *dto); @@ -334,10 +336,10 @@ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *ctask, +int iser_reg_rdma_mem(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, @@ -357,10 +359,10 @@ int iser_post_send(struct iser_desc *tx_desc); int iser_conn_state_comp(struct iser_conn *ib_conn, enum iser_ib_conn_state comp); -int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, +int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 08dc81c46f41..cdd283189047 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include <linux/kernel.h> #include <linux/slab.h> @@ -66,46 +64,46 @@ static void iser_dto_add_regd_buff(struct iser_dto *dto, /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in - * iser_ctask->data[ISER_DIR_IN].data_len + * iser_task->data[ISER_DIR_IN].data_len */ -static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, +static int iser_prepare_read_cmd(struct iscsi_task *task, unsigned int edtl) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; - struct iser_hdr *hdr = &iser_ctask->desc.iser_header; - struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN]; + struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; - err = iser_dma_map_task_data(iser_ctask, + err = iser_dma_map_task_data(iser_task, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; - if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) { + if (edtl > iser_task->data[ISER_DIR_IN].data_len) { iser_err("Total data length: %ld, less than EDTL: " "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", - iser_ctask->data[ISER_DIR_IN].data_len, edtl, - ctask->itt, iser_ctask->iser_conn); + iser_task->data[ISER_DIR_IN].data_len, edtl, + task->itt, iser_task->iser_conn); return -EINVAL; } - err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN); + err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; } - regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN]; + regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->read_va = cpu_to_be64(regd_buf->reg.va); iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", - ctask->itt, regd_buf->reg.rkey, + task->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va); return 0; @@ -113,43 +111,43 @@ static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in - * ctask->data[ISER_DIR_OUT].data_len + * task->data[ISER_DIR_OUT].data_len */ static int -iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, +iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz, unsigned int unsol_sz, unsigned int edtl) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; - struct iser_dto *send_dto = &iser_ctask->desc.dto; - struct iser_hdr *hdr = &iser_ctask->desc.iser_header; - struct iser_data_buf *buf_out = &iser_ctask->data[ISER_DIR_OUT]; + struct iser_dto *send_dto = &iser_task->desc.dto; + struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; - err = iser_dma_map_task_data(iser_ctask, + err = iser_dma_map_task_data(iser_task, buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return err; - if (edtl > iser_ctask->data[ISER_DIR_OUT].data_len) { + if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Total data length: %ld, less than EDTL: %d, " "in WRITE cmd BHS itt: %d, conn: 0x%p\n", - iser_ctask->data[ISER_DIR_OUT].data_len, - edtl, ctask->itt, ctask->conn); + iser_task->data[ISER_DIR_OUT].data_len, + edtl, task->itt, task->conn); return -EINVAL; } - err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_OUT); + err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; } - regd_buf = &iser_ctask->rdma_regd[ISER_DIR_OUT]; + regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; if (unsol_sz < edtl) { hdr->flags |= ISER_WSV; @@ -158,13 +156,13 @@ iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " "VA:%#llX + unsol:%d\n", - ctask->itt, regd_buf->reg.rkey, + task->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va, unsol_sz); } if (imm_sz > 0) { iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", - ctask->itt, imm_sz); + task->itt, imm_sz); iser_dto_add_regd_buff(send_dto, regd_buf, 0, @@ -316,38 +314,38 @@ iser_check_xmit(struct iscsi_conn *conn, void *task) /** * iser_send_command - send command PDU */ -int iser_send_command(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) +int iser_send_command(struct iscsi_conn *conn, + struct iscsi_task *task) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_dto *send_dto = NULL; unsigned long edtl; int err = 0; struct iser_data_buf *data_buf; - struct iscsi_cmd *hdr = ctask->hdr; - struct scsi_cmnd *sc = ctask->sc; + struct iscsi_cmd *hdr = task->hdr; + struct scsi_cmnd *sc = task->sc; if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); return -EPERM; } - if (iser_check_xmit(conn, ctask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; edtl = ntohl(hdr->data_length); /* build the tx desc regd header and add it to the tx desc dto */ - iser_ctask->desc.type = ISCSI_TX_SCSI_COMMAND; - send_dto = &iser_ctask->desc.dto; - send_dto->ctask = iser_ctask; - iser_create_send_desc(iser_conn, &iser_ctask->desc); + iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; + send_dto = &iser_task->desc.dto; + send_dto->task = iser_task; + iser_create_send_desc(iser_conn, &iser_task->desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) - data_buf = &iser_ctask->data[ISER_DIR_IN]; + data_buf = &iser_task->data[ISER_DIR_IN]; else - data_buf = &iser_ctask->data[ISER_DIR_OUT]; + data_buf = &iser_task->data[ISER_DIR_OUT]; if (scsi_sg_count(sc)) { /* using a scatter list */ data_buf->buf = scsi_sglist(sc); @@ -357,15 +355,15 @@ int iser_send_command(struct iscsi_conn *conn, data_buf->data_len = scsi_bufflen(sc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { - err = iser_prepare_read_cmd(ctask, edtl); + err = iser_prepare_read_cmd(task, edtl); if (err) goto send_command_error; } if (hdr->flags & ISCSI_FLAG_CMD_WRITE) { - err = iser_prepare_write_cmd(ctask, - ctask->imm_count, - ctask->imm_count + - ctask->unsol_count, + err = iser_prepare_write_cmd(task, + task->imm_count, + task->imm_count + + task->unsol_count, edtl); if (err) goto send_command_error; @@ -380,27 +378,27 @@ int iser_send_command(struct iscsi_conn *conn, goto send_command_error; } - iser_ctask->status = ISER_TASK_STATUS_STARTED; + iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(&iser_ctask->desc); + err = iser_post_send(&iser_task->desc); if (!err) return 0; send_command_error: iser_dto_buffs_release(send_dto); - iser_err("conn %p failed ctask->itt %d err %d\n",conn, ctask->itt, err); + iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); return err; } /** * iser_send_data_out - send data out PDU */ -int iser_send_data_out(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask, +int iser_send_data_out(struct iscsi_conn *conn, + struct iscsi_task *task, struct iscsi_data *hdr) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_desc *tx_desc = NULL; struct iser_dto *send_dto = NULL; unsigned long buf_offset; @@ -413,7 +411,7 @@ int iser_send_data_out(struct iscsi_conn *conn, return -EPERM; } - if (iser_check_xmit(conn, ctask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; itt = (__force uint32_t)hdr->itt; @@ -434,7 +432,7 @@ int iser_send_data_out(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ send_dto = &tx_desc->dto; - send_dto->ctask = iser_ctask; + send_dto->task = iser_task; iser_create_send_desc(iser_conn, tx_desc); iser_reg_single(iser_conn->ib_conn->device, @@ -442,15 +440,15 @@ int iser_send_data_out(struct iscsi_conn *conn, /* all data was registered for RDMA, we can use the lkey */ iser_dto_add_regd_buff(send_dto, - &iser_ctask->rdma_regd[ISER_DIR_OUT], + &iser_task->rdma_regd[ISER_DIR_OUT], buf_offset, data_seg_len); - if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) { + if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Offset:%ld & DSL:%ld in Data-Out " "inconsistent with total len:%ld, itt:%d\n", buf_offset, data_seg_len, - iser_ctask->data[ISER_DIR_OUT].data_len, itt); + iser_task->data[ISER_DIR_OUT].data_len, itt); err = -EINVAL; goto send_data_out_error; } @@ -470,10 +468,11 @@ send_data_out_error: } int iser_send_control(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask) + struct iscsi_task *task) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_desc *mdesc = mtask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_desc *mdesc = &iser_task->desc; struct iser_dto *send_dto = NULL; unsigned long data_seg_len; int err = 0; @@ -485,27 +484,27 @@ int iser_send_control(struct iscsi_conn *conn, return -EPERM; } - if (iser_check_xmit(conn,mtask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; send_dto = &mdesc->dto; - send_dto->ctask = NULL; + send_dto->task = NULL; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn->device; iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); - data_seg_len = ntoh24(mtask->hdr->dlength); + data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { regd_buf = &mdesc->data_regd_buf; memset(regd_buf, 0, sizeof(struct iser_regd_buf)); regd_buf->device = device; - regd_buf->virt_addr = mtask->data; - regd_buf->data_size = mtask->data_count; + regd_buf->virt_addr = task->data; + regd_buf->data_size = task->data_count; iser_reg_single(device, regd_buf, DMA_TO_DEVICE); iser_dto_add_regd_buff(send_dto, regd_buf, @@ -535,15 +534,13 @@ send_control_error: void iser_rcv_completion(struct iser_desc *rx_desc, unsigned long dto_xfer_len) { - struct iser_dto *dto = &rx_desc->dto; + struct iser_dto *dto = &rx_desc->dto; struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; - struct iscsi_session *session = conn->iscsi_conn->session; - struct iscsi_cmd_task *ctask; - struct iscsi_iser_cmd_task *iser_ctask; + struct iscsi_task *task; + struct iscsi_iser_task *iser_task; struct iscsi_hdr *hdr; char *rx_data = NULL; int rx_data_len = 0; - unsigned int itt; unsigned char opcode; hdr = &rx_desc->iscsi_header; @@ -559,19 +556,24 @@ void iser_rcv_completion(struct iser_desc *rx_desc, opcode = hdr->opcode & ISCSI_OPCODE_MASK; if (opcode == ISCSI_OP_SCSI_CMD_RSP) { - itt = get_itt(hdr->itt); /* mask out cid and age bits */ - if (!(itt < session->cmds_max)) + spin_lock(&conn->iscsi_conn->session->lock); + task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); + if (task) + __iscsi_get_task(task); + spin_unlock(&conn->iscsi_conn->session->lock); + + if (!task) iser_err("itt can't be matched to task!!! " - "conn %p opcode %d cmds_max %d itt %d\n", - conn->iscsi_conn,opcode,session->cmds_max,itt); - /* use the mapping given with the cmds array indexed by itt */ - ctask = (struct iscsi_cmd_task *)session->cmds[itt]; - iser_ctask = ctask->dd_data; - iser_dbg("itt %d ctask %p\n",itt,ctask); - iser_ctask->status = ISER_TASK_STATUS_COMPLETED; - iser_ctask_rdma_finalize(iser_ctask); + "conn %p opcode %d itt %d\n", + conn->iscsi_conn, opcode, hdr->itt); + else { + iser_task = task->dd_data; + iser_dbg("itt %d task %p\n",hdr->itt, task); + iser_task->status = ISER_TASK_STATUS_COMPLETED; + iser_task_rdma_finalize(iser_task); + iscsi_put_task(task); + } } - iser_dto_buffs_release(dto); iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); @@ -592,7 +594,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iser_conn *ib_conn = dto->ib_conn; struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; - struct iscsi_mgmt_task *mtask; + struct iscsi_task *task; int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -615,36 +617,31 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ - mtask = (void *) ((long)(void *)tx_desc - - sizeof(struct iscsi_mgmt_task)); - if (mtask->hdr->itt == RESERVED_ITT) { - struct iscsi_session *session = conn->session; - - spin_lock(&conn->session->lock); - iscsi_free_mgmt_task(conn, mtask); - spin_unlock(&session->lock); - } + task = (void *) ((long)(void *)tx_desc - + sizeof(struct iscsi_task)); + if (task->hdr->itt == RESERVED_ITT) + iscsi_put_task(task); } } -void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask) +void iser_task_rdma_init(struct iscsi_iser_task *iser_task) { - iser_ctask->status = ISER_TASK_STATUS_INIT; + iser_task->status = ISER_TASK_STATUS_INIT; - iser_ctask->dir[ISER_DIR_IN] = 0; - iser_ctask->dir[ISER_DIR_OUT] = 0; + iser_task->dir[ISER_DIR_IN] = 0; + iser_task->dir[ISER_DIR_OUT] = 0; - iser_ctask->data[ISER_DIR_IN].data_len = 0; - iser_ctask->data[ISER_DIR_OUT].data_len = 0; + iser_task->data[ISER_DIR_IN].data_len = 0; + iser_task->data[ISER_DIR_OUT].data_len = 0; - memset(&iser_ctask->rdma_regd[ISER_DIR_IN], 0, + memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, sizeof(struct iser_regd_buf)); - memset(&iser_ctask->rdma_regd[ISER_DIR_OUT], 0, + memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, sizeof(struct iser_regd_buf)); } -void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) +void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { int deferred; int is_rdma_aligned = 1; @@ -653,17 +650,17 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ - if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) { + if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); + iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); } - if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) { + if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); + iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); } - if (iser_ctask->dir[ISER_DIR_IN]) { - regd = &iser_ctask->rdma_regd[ISER_DIR_IN]; + if (iser_task->dir[ISER_DIR_IN]) { + regd = &iser_task->rdma_regd[ISER_DIR_IN]; deferred = iser_regd_buff_release(regd); if (deferred) { iser_err("%d references remain for BUF-IN rdma reg\n", @@ -671,8 +668,8 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) } } - if (iser_ctask->dir[ISER_DIR_OUT]) { - regd = &iser_ctask->rdma_regd[ISER_DIR_OUT]; + if (iser_task->dir[ISER_DIR_OUT]) { + regd = &iser_task->rdma_regd[ISER_DIR_OUT]; deferred = iser_regd_buff_release(regd); if (deferred) { iser_err("%d references remain for BUF-OUT rdma reg\n", @@ -682,7 +679,7 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) /* if the data was unaligned, it was already unmapped and then copied */ if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_ctask); + iser_dma_unmap_task_data(iser_task); } void iser_dto_buffs_release(struct iser_dto *dto) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index cac50c4dc159..b9453d068e9d 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include <linux/module.h> #include <linux/kernel.h> @@ -101,13 +99,13 @@ void iser_reg_single(struct iser_device *device, /** * iser_start_rdma_unaligned_sg */ -static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, +static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { int dma_nents; struct ib_device *dev; char *mem = NULL; - struct iser_data_buf *data = &iser_ctask->data[cmd_dir]; + struct iser_data_buf *data = &iser_task->data[cmd_dir]; unsigned long cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) @@ -140,37 +138,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, } } - sg_init_one(&iser_ctask->data_copy[cmd_dir].sg_single, mem, cmd_data_len); - iser_ctask->data_copy[cmd_dir].buf = - &iser_ctask->data_copy[cmd_dir].sg_single; - iser_ctask->data_copy[cmd_dir].size = 1; + sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len); + iser_task->data_copy[cmd_dir].buf = + &iser_task->data_copy[cmd_dir].sg_single; + iser_task->data_copy[cmd_dir].size = 1; - iser_ctask->data_copy[cmd_dir].copy_buf = mem; + iser_task->data_copy[cmd_dir].copy_buf = mem; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn->device->ib_device; dma_nents = ib_dma_map_sg(dev, - &iser_ctask->data_copy[cmd_dir].sg_single, + &iser_task->data_copy[cmd_dir].sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); BUG_ON(dma_nents == 0); - iser_ctask->data_copy[cmd_dir].dma_nents = dma_nents; + iser_task->data_copy[cmd_dir].dma_nents = dma_nents; return 0; } /** * iser_finalize_rdma_unaligned_sg */ -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct ib_device *dev; struct iser_data_buf *mem_copy; unsigned long cmd_data_len; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; - mem_copy = &iser_ctask->data_copy[cmd_dir]; + dev = iser_task->iser_conn->ib_conn->device->ib_device; + mem_copy = &iser_task->data_copy[cmd_dir]; ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? @@ -186,8 +184,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, /* copy back read RDMA to unaligned sg */ mem = mem_copy->copy_buf; - sgl = (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf; - sg_size = iser_ctask->data[ISER_DIR_IN].size; + sgl = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf; + sg_size = iser_task->data[ISER_DIR_IN].size; p = mem; for_each_sg(sgl, sg, sg_size, i) { @@ -200,7 +198,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, } } - cmd_data_len = iser_ctask->data[cmd_dir].data_len; + cmd_data_len = iser_task->data[cmd_dir].data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) free_pages((unsigned long)mem_copy->copy_buf, @@ -378,15 +376,15 @@ static void iser_page_vec_build(struct iser_data_buf *data, } } -int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir) +int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) { struct ib_device *dev; - iser_ctask->dir[iser_dir] = 1; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + iser_task->dir[iser_dir] = 1; + dev = iser_task->iser_conn->ib_conn->device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -396,20 +394,20 @@ int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, return 0; } -void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) { struct ib_device *dev; struct iser_data_buf *data; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn->device->ib_device; - if (iser_ctask->dir[ISER_DIR_IN]) { - data = &iser_ctask->data[ISER_DIR_IN]; + if (iser_task->dir[ISER_DIR_IN]) { + data = &iser_task->data[ISER_DIR_IN]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } - if (iser_ctask->dir[ISER_DIR_OUT]) { - data = &iser_ctask->data[ISER_DIR_OUT]; + if (iser_task->dir[ISER_DIR_OUT]) { + data = &iser_task->data[ISER_DIR_OUT]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); } } @@ -420,21 +418,21 @@ void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) * * returns 0 on success, errno code on failure */ -int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, +int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iscsi_conn *iscsi_conn = iser_ctask->iser_conn->iscsi_conn; - struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; + struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; - struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; + struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf; int aligned_len; int err; int i; struct scatterlist *sg; - regd_buf = &iser_ctask->rdma_regd[cmd_dir]; + regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { @@ -444,13 +442,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_ctask); + iser_dma_unmap_task_data(iser_task); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) + if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) return -ENOMEM; - mem = &iser_ctask->data_copy[cmd_dir]; + mem = &iser_task->data_copy[cmd_dir]; } /* if there a single dma entry, FMR is not needed */ @@ -474,8 +472,9 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); if (err) { iser_data_buf_dump(mem, ibdev); - iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, - ntoh24(iser_ctask->desc.iscsi_header.dlength)); + iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", + mem->dma_nents, + ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", ib_conn->page_vec->data_size, ib_conn->page_vec->length, ib_conn->page_vec->offset); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index d19cfe605ebb..3a917c1f796f 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ */ #include <linux/kernel.h> #include <linux/module.h> @@ -325,7 +323,18 @@ static void iser_conn_release(struct iser_conn *ib_conn) iser_device_try_release(device); if (ib_conn->iser_conn) ib_conn->iser_conn->ib_conn = NULL; - kfree(ib_conn); + iscsi_destroy_endpoint(ib_conn->ep); +} + +void iser_conn_get(struct iser_conn *ib_conn) +{ + atomic_inc(&ib_conn->refcount); +} + +void iser_conn_put(struct iser_conn *ib_conn) +{ + if (atomic_dec_and_test(&ib_conn->refcount)) + iser_conn_release(ib_conn); } /** @@ -349,7 +358,7 @@ void iser_conn_terminate(struct iser_conn *ib_conn) wait_event_interruptible(ib_conn->wait, ib_conn->state == ISER_CONN_DOWN); - iser_conn_release(ib_conn); + iser_conn_put(ib_conn); } static void iser_connect_error(struct rdma_cm_id *cma_id) @@ -483,24 +492,15 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve return ret; } -int iser_conn_init(struct iser_conn **ibconn) +void iser_conn_init(struct iser_conn *ib_conn) { - struct iser_conn *ib_conn; - - ib_conn = kzalloc(sizeof *ib_conn, GFP_KERNEL); - if (!ib_conn) { - iser_err("can't alloc memory for struct iser_conn\n"); - return -ENOMEM; - } ib_conn->state = ISER_CONN_INIT; init_waitqueue_head(&ib_conn->wait); atomic_set(&ib_conn->post_recv_buf_count, 0); atomic_set(&ib_conn->post_send_buf_count, 0); + atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); - - *ibconn = ib_conn; - return 0; } /** diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 435145709dd6..ed7c5f72cb8b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $ */ #include <linux/module.h> @@ -49,8 +47,6 @@ #include <scsi/srp.h> #include <scsi/scsi_transport_srp.h> -#include <rdma/ib_cache.h> - #include "ib_srp.h" #define DRV_NAME "ib_srp" @@ -183,10 +179,10 @@ static int srp_init_qp(struct srp_target_port *target, if (!attr) return -ENOMEM; - ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, - target->srp_host->port, - be16_to_cpu(target->path.pkey), - &attr->pkey_index); + ret = ib_find_pkey(target->srp_host->srp_dev->dev, + target->srp_host->port, + be16_to_cpu(target->path.pkey), + &attr->pkey_index); if (ret) goto out; @@ -1883,8 +1879,7 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err; - ib_get_cached_gid(host->srp_dev->dev, host->port, 0, - &target->path.sgid); + ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid); shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 63d2ae724061..e185b907fc12 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $ */ #ifndef IB_SRP_H |