From 5716af6e5234402b2017f41beb36c086201fae42 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:01:57 +0300 Subject: IB/iser: Rename ib_conn -> iser_conn Two reasons why we choose to do this: 1. No point today calling struct iser_conn by another name ib_conn 2. In the next patches we will restructure iser control plane representation - struct iser_conn: connection logical representation - struct ib_conn: connection RDMA layout representation This patch does not change any functionality. Signed-off-by: Ariel Nahum Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 125 ++++----- drivers/infiniband/ulp/iser/iscsi_iser.h | 44 ++-- drivers/infiniband/ulp/iser/iser_initiator.c | 197 +++++++------- drivers/infiniband/ulp/iser/iser_memory.c | 54 ++-- drivers/infiniband/ulp/iser/iser_verbs.c | 375 ++++++++++++++------------- 5 files changed, 403 insertions(+), 392 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 93ce62fe1594..1f3ad2b13ae2 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -147,8 +147,8 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc) { - struct iser_conn *ib_conn = task->conn->dd_data; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = task->conn->dd_data; + struct iser_device *device = iser_conn->device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; @@ -162,7 +162,7 @@ int iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; - iser_task->ib_conn = ib_conn; + iser_task->iser_conn = iser_conn; return 0; } /** @@ -290,8 +290,8 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = &iser_task->desc; - struct iser_conn *ib_conn = task->conn->dd_data; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = task->conn->dd_data; + struct iser_device *device = iser_conn->device; ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -344,7 +344,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, int is_leading) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; struct iscsi_endpoint *ep; int error; @@ -360,30 +360,30 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, (unsigned long long)transport_eph); return -EINVAL; } - ib_conn = ep->dd_data; + iser_conn = ep->dd_data; - mutex_lock(&ib_conn->state_mutex); - if (ib_conn->state != ISER_CONN_UP) { + mutex_lock(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_UP) { error = -EINVAL; iser_err("iser_conn %p state is %d, teardown started\n", - ib_conn, ib_conn->state); + iser_conn, iser_conn->state); goto out; } - error = iser_alloc_rx_descriptors(ib_conn, conn->session); + error = iser_alloc_rx_descriptors(iser_conn, conn->session); if (error) goto out; /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ - iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn); + iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn); - conn->dd_data = ib_conn; - ib_conn->iscsi_conn = conn; + conn->dd_data = iser_conn; + iser_conn->iscsi_conn = conn; out: - mutex_unlock(&ib_conn->state_mutex); + mutex_unlock(&iser_conn->state_mutex); return error; } @@ -391,11 +391,11 @@ static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *iscsi_conn; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; iscsi_conn = cls_conn->dd_data; - ib_conn = iscsi_conn->dd_data; - reinit_completion(&ib_conn->stop_completion); + iser_conn = iscsi_conn->dd_data; + reinit_completion(&iser_conn->stop_completion); return iscsi_conn_start(cls_conn); } @@ -404,18 +404,18 @@ static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; - iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn); + iser_dbg("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn); iscsi_conn_stop(cls_conn, flag); /* * Userspace may have goofed up and not bound the connection or * might have only partially setup the connection. */ - if (ib_conn) { + if (iser_conn) { conn->dd_data = NULL; - complete(&ib_conn->stop_completion); + complete(&iser_conn->stop_completion); } } @@ -447,7 +447,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct Scsi_Host *shost; - struct iser_conn *ib_conn = NULL; + struct iser_conn *iser_conn = NULL; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -464,9 +464,9 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, * the leading conn's ep so this will be NULL; */ if (ep) { - ib_conn = ep->dd_data; - if (ib_conn->pi_support) { - u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; + iser_conn = ep->dd_data; + if (iser_conn->pi_support) { + u32 sig_caps = iser_conn->device->dev_attr.sig_prot_cap; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); if (iser_pi_guard) @@ -476,8 +476,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } } - if (iscsi_host_add(shost, - ep ? ib_conn->device->ib_device->dma_device : NULL)) + if (iscsi_host_add(shost, ep ? + iser_conn->device->ib_device->dma_device : NULL)) goto free_host; if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) { @@ -577,17 +577,17 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param, char *buf) { - struct iser_conn *ib_conn = ep->dd_data; + struct iser_conn *iser_conn = ep->dd_data; int len; switch (param) { case ISCSI_PARAM_CONN_PORT: case ISCSI_PARAM_CONN_ADDRESS: - if (!ib_conn || !ib_conn->cma_id) + if (!iser_conn || !iser_conn->cma_id) return -ENOTCONN; return iscsi_conn_get_addr_param((struct sockaddr_storage *) - &ib_conn->cma_id->route.addr.dst_addr, + &iser_conn->cma_id->route.addr.dst_addr, param, buf); break; default: @@ -602,24 +602,24 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking) { int err; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; struct iscsi_endpoint *ep; ep = iscsi_create_endpoint(0); if (!ep) return ERR_PTR(-ENOMEM); - ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL); - if (!ib_conn) { + iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL); + if (!iser_conn) { err = -ENOMEM; goto failure; } - ep->dd_data = ib_conn; - ib_conn->ep = ep; - iser_conn_init(ib_conn); + ep->dd_data = iser_conn; + iser_conn->ep = ep; + iser_conn_init(iser_conn); - err = iser_connect(ib_conn, NULL, dst_addr, non_blocking); + err = iser_connect(iser_conn, NULL, dst_addr, non_blocking); if (err) goto failure; @@ -632,22 +632,22 @@ failure: static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; int rc; - ib_conn = ep->dd_data; - rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion, + iser_conn = ep->dd_data; + rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion, msecs_to_jiffies(timeout_ms)); /* if conn establishment failed, return error code to iscsi */ if (rc == 0) { - mutex_lock(&ib_conn->state_mutex); - if (ib_conn->state == ISER_CONN_TERMINATING || - ib_conn->state == ISER_CONN_DOWN) + mutex_lock(&iser_conn->state_mutex); + if (iser_conn->state == ISER_CONN_TERMINATING || + iser_conn->state == ISER_CONN_DOWN) rc = -1; - mutex_unlock(&ib_conn->state_mutex); + mutex_unlock(&iser_conn->state_mutex); } - iser_info("ib conn %p rc = %d\n", ib_conn, rc); + iser_info("ib conn %p rc = %d\n", iser_conn, rc); if (rc > 0) return 1; /* success, this is the equivalent of POLLOUT */ @@ -660,12 +660,14 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; - ib_conn = ep->dd_data; - iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state); - mutex_lock(&ib_conn->state_mutex); - iser_conn_terminate(ib_conn); + iser_conn = ep->dd_data; + iser_info("ep %p iser conn %p state %d\n", + ep, iser_conn, iser_conn->state); + + mutex_lock(&iser_conn->state_mutex); + iser_conn_terminate(iser_conn); /* * if iser_conn and iscsi_conn are bound, we must wait for @@ -673,14 +675,14 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) * the iser resources. Otherwise we are safe to free resources * immediately. */ - if (ib_conn->iscsi_conn) { - INIT_WORK(&ib_conn->release_work, iser_release_work); - queue_work(release_wq, &ib_conn->release_work); - mutex_unlock(&ib_conn->state_mutex); + if (iser_conn->iscsi_conn) { + INIT_WORK(&iser_conn->release_work, iser_release_work); + queue_work(release_wq, &iser_conn->release_work); + mutex_unlock(&iser_conn->state_mutex); } else { - ib_conn->state = ISER_CONN_DOWN; - mutex_unlock(&ib_conn->state_mutex); - iser_conn_release(ib_conn); + iser_conn->state = ISER_CONN_DOWN; + mutex_unlock(&iser_conn->state_mutex); + iser_conn_release(iser_conn); } iscsi_destroy_endpoint(ep); } @@ -843,7 +845,7 @@ register_transport_failure: static void __exit iser_exit(void) { - struct iser_conn *ib_conn, *n; + struct iser_conn *iser_conn, *n; int connlist_empty; iser_dbg("Removing iSER datamover...\n"); @@ -856,8 +858,9 @@ static void __exit iser_exit(void) if (!connlist_empty) { iser_err("Error cleanup stage completed but we still have iser " "connections, destroying them anyway.\n"); - list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) { - iser_conn_release(ib_conn); + list_for_each_entry_safe(iser_conn, n, &ig.connlist, + conn_list) { + iser_conn_release(iser_conn); } } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 9f0e0e34d6ca..ec34b8f7d385 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -179,7 +179,7 @@ struct iser_cm_hdr { /* Length of an object name string */ #define ISER_OBJECT_NAME_SIZE 64 -enum iser_ib_conn_state { +enum iser_conn_state { ISER_CONN_INIT, /* descriptor allocd, no conn */ ISER_CONN_PENDING, /* in the process of being established */ ISER_CONN_UP, /* up and running */ @@ -281,9 +281,9 @@ struct iser_device { int cq_active_qps[ISER_MAX_CQ]; int cqs_used; struct iser_cq_desc *cq_desc; - int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn, + int (*iser_alloc_rdma_reg_res)(struct iser_conn *iser_conn, unsigned cmds_max); - void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn); + void (*iser_free_rdma_reg_res)(struct iser_conn *iser_conn); int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task, @@ -320,7 +320,7 @@ struct fast_reg_descriptor { struct iser_conn { struct iscsi_conn *iscsi_conn; struct iscsi_endpoint *ep; - enum iser_ib_conn_state state; /* rdma connection state */ + enum iser_conn_state state; /* rdma connection state */ atomic_t refcount; spinlock_t lock; /* used for state changes */ struct iser_device *device; /* device context */ @@ -363,7 +363,7 @@ struct iser_conn { struct iscsi_iser_task { struct iser_tx_desc desc; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; enum iser_task_status status; struct scsi_cmnd *sc; int command_sent; /* set if command sent */ @@ -419,25 +419,26 @@ void iscsi_iser_recv(struct iscsi_conn *conn, char *rx_data, int rx_data_len); -void iser_conn_init(struct iser_conn *ib_conn); +void iser_conn_init(struct iser_conn *iser_conn); -void iser_conn_release(struct iser_conn *ib_conn); +void iser_conn_release(struct iser_conn *iser_conn); -void iser_conn_terminate(struct iser_conn *ib_conn); +void iser_conn_terminate(struct iser_conn *iser_conn); void iser_release_work(struct work_struct *work); void iser_rcv_completion(struct iser_rx_desc *desc, unsigned long dto_xfer_len, - struct iser_conn *ib_conn); + struct iser_conn *iser_conn); -void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn); +void iser_snd_completion(struct iser_tx_desc *desc, + struct iser_conn *iser_conn); void iser_task_rdma_init(struct iscsi_iser_task *task); void iser_task_rdma_finalize(struct iscsi_iser_task *task); -void iser_free_rx_descriptors(struct iser_conn *ib_conn); +void iser_free_rx_descriptors(struct iser_conn *iser_conn); void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, @@ -449,12 +450,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_connect(struct iser_conn *ib_conn, +int iser_connect(struct iser_conn *iser_conn, struct sockaddr *src_addr, struct sockaddr *dst_addr, int non_blocking); -int iser_reg_page_vec(struct iser_conn *ib_conn, +int iser_reg_page_vec(struct iser_conn *iser_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg); @@ -463,9 +464,9 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); -int iser_post_recvl(struct iser_conn *ib_conn); -int iser_post_recvm(struct iser_conn *ib_conn, int count); -int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc); +int iser_post_recvl(struct iser_conn *iser_conn); +int iser_post_recvm(struct iser_conn *iser_conn, int count); +int iser_post_send(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, @@ -476,11 +477,12 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); -int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_fastreg_pool(struct iser_conn *ib_conn); +int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, + struct iscsi_session *session); +int iser_create_fmr_pool(struct iser_conn *iser_conn, unsigned cmds_max); +void iser_free_fmr_pool(struct iser_conn *iser_conn); +int iser_create_fastreg_pool(struct iser_conn *iser_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *iser_conn); u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 8d44a4060634..1f53ccb31534 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->ib_conn->device; + struct iser_device *device = iser_task->iser_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -103,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->ib_conn->device; + struct iser_device *device = iser_task->iser_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -160,10 +160,10 @@ iser_prepare_write_cmd(struct iscsi_task *task, } /* creates a new tx descriptor and adds header regd buffer */ -static void iser_create_send_desc(struct iser_conn *ib_conn, +static void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -179,103 +179,106 @@ static void iser_create_send_desc(struct iser_conn *ib_conn, } } -static void iser_free_login_buf(struct iser_conn *ib_conn) +static void iser_free_login_buf(struct iser_conn *iser_conn) { - if (!ib_conn->login_buf) + if (!iser_conn->login_buf) return; - if (ib_conn->login_req_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_req_dma, + if (iser_conn->login_req_dma) + ib_dma_unmap_single(iser_conn->device->ib_device, + iser_conn->login_req_dma, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (ib_conn->login_resp_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_resp_dma, + if (iser_conn->login_resp_dma) + ib_dma_unmap_single(iser_conn->device->ib_device, + iser_conn->login_resp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->login_buf); + kfree(iser_conn->login_buf); /* make sure we never redo any unmapping */ - ib_conn->login_req_dma = 0; - ib_conn->login_resp_dma = 0; - ib_conn->login_buf = NULL; + iser_conn->login_req_dma = 0; + iser_conn->login_resp_dma = 0; + iser_conn->login_buf = NULL; } -static int iser_alloc_login_buf(struct iser_conn *ib_conn) +static int iser_alloc_login_buf(struct iser_conn *iser_conn) { struct iser_device *device; int req_err, resp_err; - BUG_ON(ib_conn->device == NULL); + BUG_ON(iser_conn->device == NULL); - device = ib_conn->device; + device = iser_conn->device; - ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + + iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); - if (!ib_conn->login_buf) + if (!iser_conn->login_buf) goto out_err; - ib_conn->login_req_buf = ib_conn->login_buf; - ib_conn->login_resp_buf = ib_conn->login_buf + + iser_conn->login_req_buf = iser_conn->login_buf; + iser_conn->login_resp_buf = iser_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; - ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_req_buf, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, + iser_conn->login_req_buf, + ISCSI_DEF_MAX_RECV_SEG_LEN, + DMA_TO_DEVICE); - ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_resp_buf, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, + iser_conn->login_resp_buf, + ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); req_err = ib_dma_mapping_error(device->ib_device, - ib_conn->login_req_dma); + iser_conn->login_req_dma); resp_err = ib_dma_mapping_error(device->ib_device, - ib_conn->login_resp_dma); + iser_conn->login_resp_dma); if (req_err || resp_err) { if (req_err) - ib_conn->login_req_dma = 0; + iser_conn->login_req_dma = 0; if (resp_err) - ib_conn->login_resp_dma = 0; + iser_conn->login_resp_dma = 0; goto free_login_buf; } return 0; free_login_buf: - iser_free_login_buf(ib_conn); + iser_free_login_buf(iser_conn); out_err: iser_err("unable to alloc or map login buf\n"); return -ENOMEM; } -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session) +int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, + struct iscsi_session *session) { int i, j; u64 dma_addr; struct iser_rx_desc *rx_desc; struct ib_sge *rx_sg; - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->device; - ib_conn->qp_max_recv_dtos = session->cmds_max; - ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ - ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2; + iser_conn->qp_max_recv_dtos = session->cmds_max; + iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ + iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; - if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) + if (device->iser_alloc_rdma_reg_res(iser_conn, session->scsi_cmds_max)) goto create_rdma_reg_res_failed; - if (iser_alloc_login_buf(ib_conn)) + if (iser_alloc_login_buf(iser_conn)) goto alloc_login_buf_fail; - ib_conn->rx_descs = kmalloc(session->cmds_max * + iser_conn->rx_descs = kmalloc(session->cmds_max * sizeof(struct iser_rx_desc), GFP_KERNEL); - if (!ib_conn->rx_descs) + if (!iser_conn->rx_descs) goto rx_desc_alloc_fail; - rx_desc = ib_conn->rx_descs; + rx_desc = iser_conn->rx_descs; - for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) { + for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) @@ -289,52 +292,52 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s rx_sg->lkey = device->mr->lkey; } - ib_conn->rx_desc_head = 0; + iser_conn->rx_desc_head = 0; return 0; rx_desc_dma_map_failed: - rx_desc = ib_conn->rx_descs; + rx_desc = iser_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->rx_descs); - ib_conn->rx_descs = NULL; + kfree(iser_conn->rx_descs); + iser_conn->rx_descs = NULL; rx_desc_alloc_fail: - iser_free_login_buf(ib_conn); + iser_free_login_buf(iser_conn); alloc_login_buf_fail: - device->iser_free_rdma_reg_res(ib_conn); + device->iser_free_rdma_reg_res(iser_conn); create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; } -void iser_free_rx_descriptors(struct iser_conn *ib_conn) +void iser_free_rx_descriptors(struct iser_conn *iser_conn) { int i; struct iser_rx_desc *rx_desc; - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->device; - if (!ib_conn->rx_descs) + if (!iser_conn->rx_descs) goto free_login_buf; if (device->iser_free_rdma_reg_res) - device->iser_free_rdma_reg_res(ib_conn); + device->iser_free_rdma_reg_res(iser_conn); - rx_desc = ib_conn->rx_descs; - for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) + rx_desc = iser_conn->rx_descs; + for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->rx_descs); + kfree(iser_conn->rx_descs); /* make sure we never redo any unmapping */ - ib_conn->rx_descs = NULL; + iser_conn->rx_descs = NULL; free_login_buf: - iser_free_login_buf(ib_conn); + iser_free_login_buf(iser_conn); } static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; struct iscsi_session *session = conn->session; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); @@ -347,18 +350,18 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) * response) and no posted send buffers left - they must have been * consumed during previous login phases. */ - WARN_ON(ib_conn->post_recv_buf_count != 1); - WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0); + WARN_ON(iser_conn->post_recv_buf_count != 1); + WARN_ON(atomic_read(&iser_conn->post_send_buf_count) != 0); if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); return 0; } else iser_info("Normal session, posting batch of RX %d buffers\n", - ib_conn->min_posted_rx); + iser_conn->min_posted_rx); /* Initial post receive buffers */ - if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx)) + if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx)) return -ENOMEM; return 0; @@ -370,7 +373,7 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; unsigned long edtl; int err; @@ -383,7 +386,7 @@ int iser_send_command(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; - iser_create_send_desc(ib_conn, tx_desc); + iser_create_send_desc(iser_conn, tx_desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { data_buf = &iser_task->data[ISER_DIR_IN]; @@ -423,7 +426,7 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(ib_conn, tx_desc); + err = iser_post_send(iser_conn, tx_desc); if (!err) return 0; @@ -439,7 +442,7 @@ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, struct iscsi_data *hdr) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = NULL; struct iser_regd_buf *regd_buf; @@ -488,7 +491,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(ib_conn, tx_desc); + err = iser_post_send(iser_conn, tx_desc); if (!err) return 0; @@ -501,7 +504,7 @@ send_data_out_error: int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iser_conn *ib_conn = conn->dd_data; + struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; @@ -510,9 +513,9 @@ int iser_send_control(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; - iser_create_send_desc(ib_conn, mdesc); + iser_create_send_desc(iser_conn, mdesc); - device = ib_conn->device; + device = iser_conn->device; data_seg_len = ntoh24(task->hdr->dlength); @@ -524,16 +527,16 @@ int iser_send_control(struct iscsi_conn *conn, } ib_dma_sync_single_for_cpu(device->ib_device, - ib_conn->login_req_dma, task->data_count, + iser_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - memcpy(ib_conn->login_req_buf, task->data, task->data_count); + memcpy(iser_conn->login_req_buf, task->data, task->data_count); ib_dma_sync_single_for_device(device->ib_device, - ib_conn->login_req_dma, task->data_count, + iser_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - tx_dsg->addr = ib_conn->login_req_dma; + tx_dsg->addr = iser_conn->login_req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; @@ -542,7 +545,7 @@ int iser_send_control(struct iscsi_conn *conn, if (task == conn->login_task) { iser_dbg("op %x dsl %lx, posting login rx buffer\n", task->hdr->opcode, data_seg_len); - err = iser_post_recvl(ib_conn); + err = iser_post_recvl(iser_conn); if (err) goto send_control_error; err = iser_post_rx_bufs(conn, task->hdr); @@ -550,7 +553,7 @@ int iser_send_control(struct iscsi_conn *conn, goto send_control_error; } - err = iser_post_send(ib_conn, mdesc); + err = iser_post_send(iser_conn, mdesc); if (!err) return 0; @@ -564,59 +567,59 @@ send_control_error: */ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, - struct iser_conn *ib_conn) + struct iser_conn *iser_conn) { struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen, outstanding, count, err; /* differentiate between login to all other PDUs */ - if ((char *)rx_desc == ib_conn->login_resp_buf) { - rx_dma = ib_conn->login_resp_dma; + if ((char *)rx_desc == iser_conn->login_resp_buf) { + rx_dma = iser_conn->login_resp_dma; rx_buflen = ISER_RX_LOGIN_SIZE; } else { rx_dma = rx_desc->dma_addr; rx_buflen = ISER_RX_PAYLOAD_SIZE; } - ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(iser_conn->device->ib_device, rx_dma, + rx_buflen, DMA_FROM_DEVICE); hdr = &rx_desc->iscsi_header; iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); - iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data, + iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); - ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, + ib_dma_sync_single_for_device(iser_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ - ib_conn->post_recv_buf_count--; + iser_conn->post_recv_buf_count--; - if (rx_dma == ib_conn->login_resp_dma) + if (rx_dma == iser_conn->login_resp_dma) return; - outstanding = ib_conn->post_recv_buf_count; - if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) { - count = min(ib_conn->qp_max_recv_dtos - outstanding, - ib_conn->min_posted_rx); - err = iser_post_recvm(ib_conn, count); + outstanding = iser_conn->post_recv_buf_count; + if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { + count = min(iser_conn->qp_max_recv_dtos - outstanding, + iser_conn->min_posted_rx); + err = iser_post_recvm(iser_conn, count); if (err) iser_err("posting %d rx bufs err %d\n", count, err); } } void iser_snd_completion(struct iser_tx_desc *tx_desc, - struct iser_conn *ib_conn) + struct iser_conn *iser_conn) { struct iscsi_task *task; - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->device; if (tx_desc->type == ISCSI_TX_DATAOUT) { ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, @@ -625,7 +628,7 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc, tx_desc = NULL; } - atomic_dec(&ib_conn->post_send_buf_count); + atomic_dec(&iser_conn->post_send_buf_count); if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ @@ -658,7 +661,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - struct iser_device *device = iser_task->ib_conn->device; + struct iser_device *device = iser_task->iser_conn->device; int is_rdma_data_aligned = 1; int is_rdma_prot_aligned = 1; int prot_count = scsi_prot_sg_count(iser_task->sc); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 47acd3ad3a17..ba09fbbe765e 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -49,7 +49,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *data_copy, enum iser_data_dir cmd_dir) { - struct ib_device *dev = iser_task->ib_conn->device->ib_device; + struct ib_device *dev = iser_task->iser_conn->device->ib_device; struct scatterlist *sgl = (struct scatterlist *)data->buf; struct scatterlist *sg; char *mem = NULL; @@ -116,7 +116,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct ib_device *dev; unsigned long cmd_data_len; - dev = iser_task->ib_conn->device->ib_device; + dev = iser_task->iser_conn->device->ib_device; ib_dma_unmap_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? @@ -322,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct ib_device *dev; iser_task->dir[iser_dir] = 1; - dev = iser_task->ib_conn->device->ib_device; + dev = iser_task->iser_conn->device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -337,7 +337,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, { struct ib_device *dev; - dev = iser_task->ib_conn->device->ib_device; + dev = iser_task->iser_conn->device->ib_device; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } @@ -348,7 +348,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, int aligned_len) { - struct iscsi_conn *iscsi_conn = iser_task->ib_conn->iscsi_conn; + struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; iscsi_conn->fmr_unalign_cnt++; iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", @@ -377,8 +377,8 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->ib_conn; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = iser_task->iser_conn; + struct iser_device *device = iser_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf; @@ -418,8 +418,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec, + iser_page_vec_build(mem, iser_conn->fmr.page_vec, ibdev); + err = iser_reg_page_vec(iser_conn, iser_conn->fmr.page_vec, ®d_buf->reg); if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); @@ -427,12 +427,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, mem->dma_nents, ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - ib_conn->fmr.page_vec->data_size, - ib_conn->fmr.page_vec->length, - ib_conn->fmr.page_vec->offset); - for (i = 0; i < ib_conn->fmr.page_vec->length; i++) + iser_conn->fmr.page_vec->data_size, + iser_conn->fmr.page_vec->length, + iser_conn->fmr.page_vec->offset); + for (i = 0; i < iser_conn->fmr.page_vec->length; i++) iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long) ib_conn->fmr.page_vec->pages[i]); + (unsigned long long)iser_conn->fmr.page_vec->pages[i]); } if (err) return err; @@ -533,7 +533,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, struct fast_reg_descriptor *desc, struct ib_sge *data_sge, struct ib_sge *prot_sge, struct ib_sge *sig_sge) { - struct iser_conn *ib_conn = iser_task->ib_conn; + struct iser_conn *iser_conn = iser_task->iser_conn; struct iser_pi_context *pi_ctx = desc->pi_ctx; struct ib_send_wr sig_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; @@ -579,7 +579,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, else wr->next = &sig_wr; - ret = ib_post_send(ib_conn->qp, wr, &bad_wr); + ret = ib_post_send(iser_conn->qp, wr, &bad_wr); if (ret) { iser_err("reg_sig_mr failed, ret:%d\n", ret); goto err; @@ -609,8 +609,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_sge *sge) { struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; - struct iser_conn *ib_conn = iser_task->ib_conn; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = iser_task->iser_conn; + struct iser_device *device = iser_conn->device; struct ib_device *ibdev = device->ib_device; struct ib_mr *mr; struct ib_fast_reg_page_list *frpl; @@ -677,7 +677,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, else wr->next = &fastreg_wr; - ret = ib_post_send(ib_conn->qp, wr, &bad_wr); + ret = ib_post_send(iser_conn->qp, wr, &bad_wr); if (ret) { iser_err("fast registration failed, ret:%d\n", ret); return ret; @@ -700,8 +700,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->ib_conn; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = iser_task->iser_conn; + struct iser_device *device = iser_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; @@ -724,11 +724,11 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, if (mem->dma_nents != 1 || scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { - spin_lock_irqsave(&ib_conn->lock, flags); - desc = list_first_entry(&ib_conn->fastreg.pool, + spin_lock_irqsave(&iser_conn->lock, flags); + desc = list_first_entry(&iser_conn->fastreg.pool, struct fast_reg_descriptor, list); list_del(&desc->list); - spin_unlock_irqrestore(&ib_conn->lock, flags); + spin_unlock_irqrestore(&iser_conn->lock, flags); regd_buf->reg.mem_h = desc; } @@ -791,9 +791,9 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, return 0; err_reg: if (desc) { - spin_lock_irqsave(&ib_conn->lock, flags); - list_add_tail(&desc->list, &ib_conn->fastreg.pool); - spin_unlock_irqrestore(&ib_conn->lock, flags); + spin_lock_irqsave(&iser_conn->lock, flags); + list_add_tail(&desc->list, &iser_conn->fastreg.pool); + spin_unlock_irqrestore(&iser_conn->lock, flags); } return err; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 3bfec4bbda52..778c166916fe 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -213,19 +213,19 @@ static void iser_free_device_ib_res(struct iser_device *device) * * returns 0 on success, or errno code on failure */ -int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fmr_pool(struct iser_conn *iser_conn, unsigned cmds_max) { - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->device; struct ib_fmr_pool_param params; int ret = -ENOMEM; - ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + + iser_conn->fmr.page_vec = kmalloc(sizeof(*iser_conn->fmr.page_vec) + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), GFP_KERNEL); - if (!ib_conn->fmr.page_vec) + if (!iser_conn->fmr.page_vec) return ret; - ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1); + iser_conn->fmr.page_vec->pages = (u64 *)(iser_conn->fmr.page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * @@ -241,16 +241,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); - if (!IS_ERR(ib_conn->fmr.pool)) + iser_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); + if (!IS_ERR(iser_conn->fmr.pool)) return 0; /* no FMR => no need for page_vec */ - kfree(ib_conn->fmr.page_vec); - ib_conn->fmr.page_vec = NULL; + kfree(iser_conn->fmr.page_vec); + iser_conn->fmr.page_vec = NULL; - ret = PTR_ERR(ib_conn->fmr.pool); - ib_conn->fmr.pool = NULL; + ret = PTR_ERR(iser_conn->fmr.pool); + iser_conn->fmr.pool = NULL; if (ret != -ENOSYS) { iser_err("FMR allocation failed, err %d\n", ret); return ret; @@ -263,18 +263,18 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) /** * iser_free_fmr_pool - releases the FMR pool and page vec */ -void iser_free_fmr_pool(struct iser_conn *ib_conn) +void iser_free_fmr_pool(struct iser_conn *iser_conn) { iser_info("freeing conn %p fmr pool %p\n", - ib_conn, ib_conn->fmr.pool); + iser_conn, iser_conn->fmr.pool); - if (ib_conn->fmr.pool != NULL) - ib_destroy_fmr_pool(ib_conn->fmr.pool); + if (iser_conn->fmr.pool != NULL) + ib_destroy_fmr_pool(iser_conn->fmr.pool); - ib_conn->fmr.pool = NULL; + iser_conn->fmr.pool = NULL; - kfree(ib_conn->fmr.page_vec); - ib_conn->fmr.page_vec = NULL; + kfree(iser_conn->fmr.page_vec); + iser_conn->fmr.page_vec = NULL; } static int @@ -367,14 +367,14 @@ fast_reg_mr_failure: * for fast registration work requests. * returns 0 on success, or errno code on failure */ -int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fastreg_pool(struct iser_conn *iser_conn, unsigned cmds_max) { - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->device; struct fast_reg_descriptor *desc; int i, ret; - INIT_LIST_HEAD(&ib_conn->fastreg.pool); - ib_conn->fastreg.pool_size = 0; + INIT_LIST_HEAD(&iser_conn->fastreg.pool); + iser_conn->fastreg.pool_size = 0; for (i = 0; i < cmds_max; i++) { desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { @@ -384,7 +384,7 @@ int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) } ret = iser_create_fastreg_desc(device->ib_device, device->pd, - ib_conn->pi_support, desc); + iser_conn->pi_support, desc); if (ret) { iser_err("Failed to create fastreg descriptor err=%d\n", ret); @@ -392,31 +392,31 @@ int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) goto err; } - list_add_tail(&desc->list, &ib_conn->fastreg.pool); - ib_conn->fastreg.pool_size++; + list_add_tail(&desc->list, &iser_conn->fastreg.pool); + iser_conn->fastreg.pool_size++; } return 0; err: - iser_free_fastreg_pool(ib_conn); + iser_free_fastreg_pool(iser_conn); return ret; } /** * iser_free_fastreg_pool - releases the pool of fast_reg descriptors */ -void iser_free_fastreg_pool(struct iser_conn *ib_conn) +void iser_free_fastreg_pool(struct iser_conn *iser_conn) { struct fast_reg_descriptor *desc, *tmp; int i = 0; - if (list_empty(&ib_conn->fastreg.pool)) + if (list_empty(&iser_conn->fastreg.pool)) return; - iser_info("freeing conn %p fr pool\n", ib_conn); + iser_info("freeing conn %p fr pool\n", iser_conn); - list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { + list_for_each_entry_safe(desc, tmp, &iser_conn->fastreg.pool, list) { list_del(&desc->list); ib_free_fast_reg_page_list(desc->data_frpl); ib_dereg_mr(desc->data_mr); @@ -430,9 +430,9 @@ void iser_free_fastreg_pool(struct iser_conn *ib_conn) ++i; } - if (i < ib_conn->fastreg.pool_size) + if (i < iser_conn->fastreg.pool_size) iser_warn("pool still has %d regions registered\n", - ib_conn->fastreg.pool_size - i); + iser_conn->fastreg.pool_size - i); } /** @@ -440,16 +440,16 @@ void iser_free_fastreg_pool(struct iser_conn *ib_conn) * * returns 0 on success, -1 on failure */ -static int iser_create_ib_conn_res(struct iser_conn *ib_conn) +static int iser_create_ib_conn_res(struct iser_conn *iser_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; int ret = -ENOMEM; int index, min_index = 0; - BUG_ON(ib_conn->device == NULL); + BUG_ON(iser_conn->device == NULL); - device = ib_conn->device; + device = iser_conn->device; memset(&init_attr, 0, sizeof init_attr); @@ -461,10 +461,10 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) min_index = index; device->cq_active_qps[min_index]++; mutex_unlock(&ig.connlist_mutex); - iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); + iser_info("cq index %d used for iser_conn %p\n", min_index, iser_conn); init_attr.event_handler = iser_qp_event_callback; - init_attr.qp_context = (void *)ib_conn; + init_attr.qp_context = (void *)iser_conn; init_attr.send_cq = device->tx_cq[min_index]; init_attr.recv_cq = device->rx_cq[min_index]; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; @@ -472,21 +472,21 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; - if (ib_conn->pi_support) { + if (iser_conn->pi_support) { init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; } else { init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; } - ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); + ret = rdma_create_qp(iser_conn->cma_id, device->pd, &init_attr); if (ret) goto out_err; - ib_conn->qp = ib_conn->cma_id->qp; + iser_conn->qp = iser_conn->cma_id->qp; iser_info("setting conn %p cma_id %p qp %p\n", - ib_conn, ib_conn->cma_id, - ib_conn->cma_id->qp); + iser_conn, iser_conn->cma_id, + iser_conn->cma_id->qp); return ret; out_err: @@ -497,25 +497,25 @@ out_err: /** * releases the QP object */ -static void iser_free_ib_conn_res(struct iser_conn *ib_conn) +static void iser_free_ib_conn_res(struct iser_conn *iser_conn) { int cq_index; - BUG_ON(ib_conn == NULL); + BUG_ON(iser_conn == NULL); iser_info("freeing conn %p cma_id %p qp %p\n", - ib_conn, ib_conn->cma_id, - ib_conn->qp); + iser_conn, iser_conn->cma_id, + iser_conn->qp); /* qp is created only once both addr & route are resolved */ - if (ib_conn->qp != NULL) { - cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; - ib_conn->device->cq_active_qps[cq_index]--; + if (iser_conn->qp != NULL) { + cq_index = ((struct iser_cq_desc *)iser_conn->qp->recv_cq->cq_context)->cq_index; + iser_conn->device->cq_active_qps[cq_index]--; - rdma_destroy_qp(ib_conn->cma_id); + rdma_destroy_qp(iser_conn->cma_id); } - ib_conn->qp = NULL; + iser_conn->qp = NULL; } /** @@ -572,75 +572,77 @@ static void iser_device_try_release(struct iser_device *device) /** * Called with state mutex held **/ -static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, - enum iser_ib_conn_state comp, - enum iser_ib_conn_state exch) +static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, + enum iser_conn_state comp, + enum iser_conn_state exch) { int ret; - if ((ret = (ib_conn->state == comp))) - ib_conn->state = exch; + ret = (iser_conn->state == comp); + if (ret) + iser_conn->state = exch; + return ret; } void iser_release_work(struct work_struct *work) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; int rc; - ib_conn = container_of(work, struct iser_conn, release_work); + iser_conn = container_of(work, struct iser_conn, release_work); /* wait for .conn_stop callback */ - rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ); + rc = wait_for_completion_timeout(&iser_conn->stop_completion, 30 * HZ); WARN_ON(rc == 0); /* wait for the qp`s post send and post receive buffers to empty */ - rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ); + rc = wait_for_completion_timeout(&iser_conn->flush_completion, 30 * HZ); WARN_ON(rc == 0); - ib_conn->state = ISER_CONN_DOWN; + iser_conn->state = ISER_CONN_DOWN; - mutex_lock(&ib_conn->state_mutex); - ib_conn->state = ISER_CONN_DOWN; - mutex_unlock(&ib_conn->state_mutex); + mutex_lock(&iser_conn->state_mutex); + iser_conn->state = ISER_CONN_DOWN; + mutex_unlock(&iser_conn->state_mutex); - iser_conn_release(ib_conn); + iser_conn_release(iser_conn); } /** * Frees all conn objects and deallocs conn descriptor */ -void iser_conn_release(struct iser_conn *ib_conn) +void iser_conn_release(struct iser_conn *iser_conn) { - struct iser_device *device = ib_conn->device; + struct iser_device *device = iser_conn->device; mutex_lock(&ig.connlist_mutex); - list_del(&ib_conn->conn_list); + list_del(&iser_conn->conn_list); mutex_unlock(&ig.connlist_mutex); - mutex_lock(&ib_conn->state_mutex); - BUG_ON(ib_conn->state != ISER_CONN_DOWN); + mutex_lock(&iser_conn->state_mutex); + BUG_ON(iser_conn->state != ISER_CONN_DOWN); - iser_free_rx_descriptors(ib_conn); - iser_free_ib_conn_res(ib_conn); - ib_conn->device = NULL; + iser_free_rx_descriptors(iser_conn); + iser_free_ib_conn_res(iser_conn); + iser_conn->device = NULL; /* on EVENT_ADDR_ERROR there's no device yet for this conn */ if (device != NULL) iser_device_try_release(device); - mutex_unlock(&ib_conn->state_mutex); + mutex_unlock(&iser_conn->state_mutex); /* if cma handler context, the caller actually destroy the id */ - if (ib_conn->cma_id != NULL) { - rdma_destroy_id(ib_conn->cma_id); - ib_conn->cma_id = NULL; + if (iser_conn->cma_id != NULL) { + rdma_destroy_id(iser_conn->cma_id); + iser_conn->cma_id = NULL; } - kfree(ib_conn); + kfree(iser_conn); } /** * triggers start of the disconnect procedures and wait for them to be done */ -void iser_conn_terminate(struct iser_conn *ib_conn) +void iser_conn_terminate(struct iser_conn *iser_conn) { int err = 0; @@ -649,11 +651,11 @@ void iser_conn_terminate(struct iser_conn *ib_conn) * the QP state to ERROR */ - iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); - err = rdma_disconnect(ib_conn->cma_id); + iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); + err = rdma_disconnect(iser_conn->cma_id); if (err) iser_err("Failed to disconnect, conn: 0x%p err %d\n", - ib_conn,err); + iser_conn, err); } /** @@ -661,10 +663,10 @@ void iser_conn_terminate(struct iser_conn *ib_conn) **/ static void iser_connect_error(struct rdma_cm_id *cma_id) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; - ib_conn = (struct iser_conn *)cma_id->context; - ib_conn->state = ISER_CONN_DOWN; + iser_conn = (struct iser_conn *)cma_id->context; + iser_conn->state = ISER_CONN_DOWN; } /** @@ -673,11 +675,11 @@ static void iser_connect_error(struct rdma_cm_id *cma_id) static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; int ret; - ib_conn = (struct iser_conn *)cma_id->context; - if (ib_conn->state != ISER_CONN_PENDING) + iser_conn = (struct iser_conn *)cma_id->context; + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -688,7 +690,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) return; } - ib_conn->device = device; + iser_conn->device = device; /* connection T10-PI support */ if (iser_pi_enable) { @@ -696,10 +698,10 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) IB_DEVICE_SIGNATURE_HANDOVER)) { iser_warn("T10-PI requested but not supported on %s, " "continue without T10-PI\n", - ib_conn->device->ib_device->name); - ib_conn->pi_support = false; + iser_conn->device->ib_device->name); + iser_conn->pi_support = false; } else { - ib_conn->pi_support = true; + iser_conn->pi_support = true; } } @@ -719,10 +721,10 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) struct rdma_conn_param conn_param; int ret; struct iser_cm_hdr req_hdr; - struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context; - struct iser_device *device = ib_conn->device; + struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; + struct iser_device *device = iser_conn->device; - if (ib_conn->state != ISER_CONN_PENDING) + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -755,34 +757,34 @@ failure: static void iser_connected_handler(struct rdma_cm_id *cma_id) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; struct ib_qp_attr attr; struct ib_qp_init_attr init_attr; - ib_conn = (struct iser_conn *)cma_id->context; - if (ib_conn->state != ISER_CONN_PENDING) + iser_conn = (struct iser_conn *)cma_id->context; + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); - ib_conn->state = ISER_CONN_UP; - complete(&ib_conn->up_completion); + iser_conn->state = ISER_CONN_UP; + complete(&iser_conn->up_completion); } static void iser_disconnected_handler(struct rdma_cm_id *cma_id) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; - ib_conn = (struct iser_conn *)cma_id->context; + iser_conn = (struct iser_conn *)cma_id->context; /* getting here when the state is UP means that the conn is being * * terminated asynchronously from the iSCSI layer's perspective. */ - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)){ - if (ib_conn->iscsi_conn) - iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); + if (iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, + ISER_CONN_TERMINATING)){ + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); else iser_err("iscsi_iser connection isn't bound\n"); } @@ -791,21 +793,21 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) * block also exists in iser_handle_comp_error(), but it is needed here * for cases of no flushes at all, e.g. discovery over rdma. */ - if (ib_conn->post_recv_buf_count == 0 && - (atomic_read(&ib_conn->post_send_buf_count) == 0)) { - complete(&ib_conn->flush_completion); + if (iser_conn->post_recv_buf_count == 0 && + (atomic_read(&iser_conn->post_send_buf_count) == 0)) { + complete(&iser_conn->flush_completion); } } static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; - ib_conn = (struct iser_conn *)cma_id->context; + iser_conn = (struct iser_conn *)cma_id->context; iser_info("event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); - mutex_lock(&ib_conn->state_mutex); + mutex_lock(&iser_conn->state_mutex); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: iser_addr_handler(cma_id); @@ -833,82 +835,82 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } - mutex_unlock(&ib_conn->state_mutex); + mutex_unlock(&iser_conn->state_mutex); return 0; } -void iser_conn_init(struct iser_conn *ib_conn) +void iser_conn_init(struct iser_conn *iser_conn) { - ib_conn->state = ISER_CONN_INIT; - ib_conn->post_recv_buf_count = 0; - atomic_set(&ib_conn->post_send_buf_count, 0); - init_completion(&ib_conn->stop_completion); - init_completion(&ib_conn->flush_completion); - init_completion(&ib_conn->up_completion); - INIT_LIST_HEAD(&ib_conn->conn_list); - spin_lock_init(&ib_conn->lock); - mutex_init(&ib_conn->state_mutex); + iser_conn->state = ISER_CONN_INIT; + iser_conn->post_recv_buf_count = 0; + atomic_set(&iser_conn->post_send_buf_count, 0); + init_completion(&iser_conn->stop_completion); + init_completion(&iser_conn->flush_completion); + init_completion(&iser_conn->up_completion); + INIT_LIST_HEAD(&iser_conn->conn_list); + spin_lock_init(&iser_conn->lock); + mutex_init(&iser_conn->state_mutex); } /** * starts the process of connecting to the target * sleeps until the connection is established or rejected */ -int iser_connect(struct iser_conn *ib_conn, +int iser_connect(struct iser_conn *iser_conn, struct sockaddr *src_addr, struct sockaddr *dst_addr, int non_blocking) { int err = 0; - mutex_lock(&ib_conn->state_mutex); + mutex_lock(&iser_conn->state_mutex); - sprintf(ib_conn->name, "%pISp", dst_addr); + sprintf(iser_conn->name, "%pISp", dst_addr); - iser_info("connecting to: %s\n", ib_conn->name); + iser_info("connecting to: %s\n", iser_conn->name); /* the device is known only --after-- address resolution */ - ib_conn->device = NULL; + iser_conn->device = NULL; - ib_conn->state = ISER_CONN_PENDING; + iser_conn->state = ISER_CONN_PENDING; - ib_conn->cma_id = rdma_create_id(iser_cma_handler, - (void *)ib_conn, + iser_conn->cma_id = rdma_create_id(iser_cma_handler, + (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(ib_conn->cma_id)) { - err = PTR_ERR(ib_conn->cma_id); + if (IS_ERR(iser_conn->cma_id)) { + err = PTR_ERR(iser_conn->cma_id); iser_err("rdma_create_id failed: %d\n", err); goto id_failure; } - err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000); + err = rdma_resolve_addr(iser_conn->cma_id, src_addr, dst_addr, 1000); if (err) { iser_err("rdma_resolve_addr failed: %d\n", err); goto addr_failure; } if (!non_blocking) { - wait_for_completion_interruptible(&ib_conn->up_completion); + wait_for_completion_interruptible(&iser_conn->up_completion); - if (ib_conn->state != ISER_CONN_UP) { + if (iser_conn->state != ISER_CONN_UP) { err = -EIO; goto connect_failure; } } - mutex_unlock(&ib_conn->state_mutex); + mutex_unlock(&iser_conn->state_mutex); mutex_lock(&ig.connlist_mutex); - list_add(&ib_conn->conn_list, &ig.connlist); + list_add(&iser_conn->conn_list, &ig.connlist); mutex_unlock(&ig.connlist_mutex); return 0; id_failure: - ib_conn->cma_id = NULL; + iser_conn->cma_id = NULL; addr_failure: - ib_conn->state = ISER_CONN_DOWN; + iser_conn->state = ISER_CONN_DOWN; connect_failure: - mutex_unlock(&ib_conn->state_mutex); - iser_conn_release(ib_conn); + mutex_unlock(&iser_conn->state_mutex); + iser_conn_release(iser_conn); return err; } @@ -917,7 +919,7 @@ connect_failure: * * returns: 0 on success, errno code on failure */ -int iser_reg_page_vec(struct iser_conn *ib_conn, +int iser_reg_page_vec(struct iser_conn *iser_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg) { @@ -929,7 +931,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, page_list = page_vec->pages; io_addr = page_list[0]; - mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool, + mem = ib_fmr_pool_map_phys(iser_conn->fmr.pool, page_list, page_vec->length, io_addr); @@ -987,7 +989,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; - struct iser_conn *ib_conn = iser_task->ib_conn; + struct iser_conn *iser_conn = iser_task->iser_conn; struct fast_reg_descriptor *desc = reg->mem_h; if (!reg->is_mr) @@ -995,61 +997,61 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, reg->mem_h = NULL; reg->is_mr = 0; - spin_lock_bh(&ib_conn->lock); - list_add_tail(&desc->list, &ib_conn->fastreg.pool); - spin_unlock_bh(&ib_conn->lock); + spin_lock_bh(&iser_conn->lock); + list_add_tail(&desc->list, &iser_conn->fastreg.pool); + spin_unlock_bh(&iser_conn->lock); } -int iser_post_recvl(struct iser_conn *ib_conn) +int iser_post_recvl(struct iser_conn *iser_conn) { struct ib_recv_wr rx_wr, *rx_wr_failed; struct ib_sge sge; int ib_ret; - sge.addr = ib_conn->login_resp_dma; + sge.addr = iser_conn->login_resp_dma; sge.length = ISER_RX_LOGIN_SIZE; - sge.lkey = ib_conn->device->mr->lkey; + sge.lkey = iser_conn->device->mr->lkey; - rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; + rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; rx_wr.next = NULL; - ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); + iser_conn->post_recv_buf_count++; + ib_ret = ib_post_recv(iser_conn->qp, &rx_wr, &rx_wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count--; + iser_conn->post_recv_buf_count--; } return ib_ret; } -int iser_post_recvm(struct iser_conn *ib_conn, int count) +int iser_post_recvm(struct iser_conn *iser_conn, int count) { struct ib_recv_wr *rx_wr, *rx_wr_failed; int i, ib_ret; - unsigned int my_rx_head = ib_conn->rx_desc_head; + unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; - for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { - rx_desc = &ib_conn->rx_descs[my_rx_head]; + for (rx_wr = iser_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { + rx_desc = &iser_conn->rx_descs[my_rx_head]; rx_wr->wr_id = (unsigned long)rx_desc; rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; - my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; + my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; } rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ - ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); + iser_conn->post_recv_buf_count += count; + ib_ret = ib_post_recv(iser_conn->qp, iser_conn->rx_wr, &rx_wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count -= count; + iser_conn->post_recv_buf_count -= count; } else - ib_conn->rx_desc_head = my_rx_head; + iser_conn->rx_desc_head = my_rx_head; return ib_ret; } @@ -1059,13 +1061,14 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count) * * returns 0 on success, -1 on failure */ -int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) +int iser_post_send(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { int ib_ret; struct ib_send_wr send_wr, *send_wr_failed; - ib_dma_sync_single_for_device(ib_conn->device->ib_device, - tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); + ib_dma_sync_single_for_device(iser_conn->device->ib_device, + tx_desc->dma_addr, ISER_HEADERS_LEN, + DMA_TO_DEVICE); send_wr.next = NULL; send_wr.wr_id = (unsigned long)tx_desc; @@ -1074,37 +1077,37 @@ int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - atomic_inc(&ib_conn->post_send_buf_count); + atomic_inc(&iser_conn->post_send_buf_count); - ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); + ib_ret = ib_post_send(iser_conn->qp, &send_wr, &send_wr_failed); if (ib_ret) { iser_err("ib_post_send failed, ret:%d\n", ib_ret); - atomic_dec(&ib_conn->post_send_buf_count); + atomic_dec(&iser_conn->post_send_buf_count); } return ib_ret; } static void iser_handle_comp_error(struct iser_tx_desc *desc, - struct iser_conn *ib_conn) + struct iser_conn *iser_conn) { if (desc && desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, desc); - if (ib_conn->post_recv_buf_count == 0 && - atomic_read(&ib_conn->post_send_buf_count) == 0) { + if (iser_conn->post_recv_buf_count == 0 && + atomic_read(&iser_conn->post_send_buf_count) == 0) { /** * getting here when the state is UP means that the conn is * being terminated asynchronously from the iSCSI layer's * perspective. It is safe to peek at the connection state * since iscsi_conn_failure is allowed to be called twice. **/ - if (ib_conn->state == ISER_CONN_UP) - iscsi_conn_failure(ib_conn->iscsi_conn, + if (iser_conn->state == ISER_CONN_UP) + iscsi_conn_failure(iser_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); /* no more non completed posts to the QP, complete the * termination process w.o worrying on disconnect event */ - complete(&ib_conn->flush_completion); + complete(&iser_conn->flush_completion); } } @@ -1113,15 +1116,15 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) struct ib_cq *cq = device->tx_cq[cq_index]; struct ib_wc wc; struct iser_tx_desc *tx_desc; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; int completed_tx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; - ib_conn = wc.qp->qp_context; + iser_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_SEND) - iser_snd_completion(tx_desc, ib_conn); + iser_snd_completion(tx_desc, iser_conn); else iser_err("expected opcode %d got %d\n", IB_WC_SEND, wc.opcode); @@ -1129,8 +1132,8 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) iser_err("tx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); if (wc.wr_id != ISER_FASTREG_LI_WRID) { - atomic_dec(&ib_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, ib_conn); + atomic_dec(&iser_conn->post_send_buf_count); + iser_handle_comp_error(tx_desc, iser_conn); } } completed_tx++; @@ -1148,7 +1151,7 @@ static void iser_cq_tasklet_fn(unsigned long data) struct ib_wc wc; struct iser_rx_desc *desc; unsigned long xfer_len; - struct iser_conn *ib_conn; + struct iser_conn *iser_conn; int completed_tx, completed_rx = 0; /* First do tx drain, so in a case where we have rx flushes and a successful @@ -1159,11 +1162,11 @@ static void iser_cq_tasklet_fn(unsigned long data) while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); - ib_conn = wc.qp->qp_context; + iser_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_RECV) { xfer_len = (unsigned long)wc.byte_len; - iser_rcv_completion(desc, xfer_len, ib_conn); + iser_rcv_completion(desc, xfer_len, iser_conn); } else iser_err("expected opcode %d got %d\n", IB_WC_RECV, wc.opcode); @@ -1171,8 +1174,8 @@ static void iser_cq_tasklet_fn(unsigned long data) if (wc.status != IB_WC_WR_FLUSH_ERR) iser_err("rx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); - ib_conn->post_recv_buf_count--; - iser_handle_comp_error(NULL, ib_conn); + iser_conn->post_recv_buf_count--; + iser_handle_comp_error(NULL, iser_conn); } completed_rx++; if (!(completed_rx & 63)) -- cgit v1.2.3 From a4ee3539f6e2955815b93350bbce01e8915d27f8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:01:58 +0300 Subject: IB/iser: Re-introduce ib_conn Structure that describes the RDMA relates connection objects. Static member of iser_conn. This patch does not change any functionality Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 18 ++- drivers/infiniband/ulp/iser/iscsi_iser.h | 85 ++++++---- drivers/infiniband/ulp/iser/iser_initiator.c | 63 ++++---- drivers/infiniband/ulp/iser/iser_memory.c | 52 +++--- drivers/infiniband/ulp/iser/iser_verbs.c | 231 ++++++++++++++------------- 5 files changed, 245 insertions(+), 204 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1f3ad2b13ae2..db83530184f8 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -148,7 +148,7 @@ int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc) { struct iser_conn *iser_conn = task->conn->dd_data; - struct iser_device *device = iser_conn->device; + struct iser_device *device = iser_conn->ib_conn.device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; @@ -291,7 +291,7 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = &iser_task->desc; struct iser_conn *iser_conn = task->conn->dd_data; - struct iser_device *device = iser_conn->device; + struct iser_device *device = iser_conn->ib_conn.device; ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -448,6 +448,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct iscsi_session *session; struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; + struct ib_conn *ib_conn; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -465,8 +466,9 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, */ if (ep) { iser_conn = ep->dd_data; - if (iser_conn->pi_support) { - u32 sig_caps = iser_conn->device->dev_attr.sig_prot_cap; + ib_conn = &iser_conn->ib_conn; + if (ib_conn->pi_support) { + u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); if (iser_pi_guard) @@ -477,7 +479,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } if (iscsi_host_add(shost, ep ? - iser_conn->device->ib_device->dma_device : NULL)) + ib_conn->device->ib_device->dma_device : NULL)) goto free_host; if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) { @@ -583,12 +585,12 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, switch (param) { case ISCSI_PARAM_CONN_PORT: case ISCSI_PARAM_CONN_ADDRESS: - if (!iser_conn || !iser_conn->cma_id) + if (!iser_conn || !iser_conn->ib_conn.cma_id) return -ENOTCONN; return iscsi_conn_get_addr_param((struct sockaddr_storage *) - &iser_conn->cma_id->route.addr.dst_addr, - param, buf); + &iser_conn->ib_conn.cma_id->route.addr.dst_addr, + param, buf); break; default: return -ENOSYS; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index ec34b8f7d385..4ad73c91e531 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -265,6 +265,7 @@ struct iser_rx_desc { #define ISER_MAX_CQ 4 struct iser_conn; +struct ib_conn; struct iscsi_iser_task; struct iser_device { @@ -281,9 +282,9 @@ struct iser_device { int cq_active_qps[ISER_MAX_CQ]; int cqs_used; struct iser_cq_desc *cq_desc; - int (*iser_alloc_rdma_reg_res)(struct iser_conn *iser_conn, + int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn, unsigned cmds_max); - void (*iser_free_rdma_reg_res)(struct iser_conn *iser_conn); + void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn); int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task, @@ -317,20 +318,57 @@ struct fast_reg_descriptor { u8 reg_indicators; }; +/** + * struct ib_conn - Infiniband related objects + * + * @cma_id: rdma_cm connection maneger handle + * @qp: Connection Queue-pair + * @post_recv_buf_count: post receive counter + * @post_send_buf_count: post send counter + * @rx_wr: receive work request for batch posts + * @device: reference to iser device + * @pi_support: Indicate device T10-PI support + * @lock: protects fmr/fastreg pool + * @union.fmr: + * @pool: FMR pool for fast registrations + * @page_vec: page vector to hold mapped commands pages + * used for registration + * @union.fastreg: + * @pool: Fast registration descriptors pool for fast + * registrations + * @pool_size: Size of pool + */ +struct ib_conn { + struct rdma_cm_id *cma_id; + struct ib_qp *qp; + int post_recv_buf_count; + atomic_t post_send_buf_count; + struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + struct iser_device *device; + int cq_index; + bool pi_support; + spinlock_t lock; + union { + struct { + struct ib_fmr_pool *pool; + struct iser_page_vec *page_vec; + } fmr; + struct { + struct list_head pool; + int pool_size; + } fastreg; + }; +}; + struct iser_conn { + struct ib_conn ib_conn; struct iscsi_conn *iscsi_conn; struct iscsi_endpoint *ep; enum iser_conn_state state; /* rdma connection state */ atomic_t refcount; - spinlock_t lock; /* used for state changes */ - struct iser_device *device; /* device context */ - struct rdma_cm_id *cma_id; /* CMA ID */ - struct ib_qp *qp; /* QP */ unsigned qp_max_recv_dtos; /* num of rx buffers */ unsigned qp_max_recv_dtos_mask; /* above minus 1 */ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ - int post_recv_buf_count; /* posted rx count */ - atomic_t post_send_buf_count; /* posted tx count */ char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; struct completion stop_completion; @@ -344,21 +382,6 @@ struct iser_conn { u64 login_req_dma, login_resp_dma; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; - struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; - bool pi_support; - - /* Connection memory registration pool */ - union { - struct { - struct ib_fmr_pool *pool; /* pool of IB FMRs */ - struct iser_page_vec *page_vec; /* represents SG to fmr maps* - * maps serialized as tx is*/ - } fmr; - struct { - struct list_head pool; - int pool_size; - } fastreg; - }; }; struct iscsi_iser_task { @@ -429,10 +452,10 @@ void iser_release_work(struct work_struct *work); void iser_rcv_completion(struct iser_rx_desc *desc, unsigned long dto_xfer_len, - struct iser_conn *iser_conn); + struct ib_conn *ib_conn); void iser_snd_completion(struct iser_tx_desc *desc, - struct iser_conn *iser_conn); + struct ib_conn *ib_conn); void iser_task_rdma_init(struct iscsi_iser_task *task); @@ -455,7 +478,7 @@ int iser_connect(struct iser_conn *iser_conn, struct sockaddr *dst_addr, int non_blocking); -int iser_reg_page_vec(struct iser_conn *iser_conn, +int iser_reg_page_vec(struct ib_conn *ib_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg); @@ -466,7 +489,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, int iser_post_recvl(struct iser_conn *iser_conn); int iser_post_recvm(struct iser_conn *iser_conn, int count); -int iser_post_send(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc); +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, @@ -479,10 +502,10 @@ int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, struct iscsi_session *session); -int iser_create_fmr_pool(struct iser_conn *iser_conn, unsigned cmds_max); -void iser_free_fmr_pool(struct iser_conn *iser_conn); -int iser_create_fastreg_pool(struct iser_conn *iser_conn, unsigned cmds_max); -void iser_free_fastreg_pool(struct iser_conn *iser_conn); +int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max); +void iser_free_fmr_pool(struct ib_conn *ib_conn); +int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct ib_conn *ib_conn); u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 1f53ccb31534..123174570c16 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->device; + struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -103,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->device; + struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -163,7 +163,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, static void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { - struct iser_device *device = iser_conn->device; + struct iser_device *device = iser_conn->ib_conn.device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -181,16 +181,18 @@ static void iser_create_send_desc(struct iser_conn *iser_conn, static void iser_free_login_buf(struct iser_conn *iser_conn) { + struct iser_device *device = iser_conn->ib_conn.device; + if (!iser_conn->login_buf) return; if (iser_conn->login_req_dma) - ib_dma_unmap_single(iser_conn->device->ib_device, + ib_dma_unmap_single(device->ib_device, iser_conn->login_req_dma, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); if (iser_conn->login_resp_dma) - ib_dma_unmap_single(iser_conn->device->ib_device, + ib_dma_unmap_single(device->ib_device, iser_conn->login_resp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); @@ -204,12 +206,10 @@ static void iser_free_login_buf(struct iser_conn *iser_conn) static int iser_alloc_login_buf(struct iser_conn *iser_conn) { - struct iser_device *device; + struct iser_device *device = iser_conn->ib_conn.device; int req_err, resp_err; - BUG_ON(iser_conn->device == NULL); - - device = iser_conn->device; + BUG_ON(device == NULL); iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); @@ -259,13 +259,14 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, u64 dma_addr; struct iser_rx_desc *rx_desc; struct ib_sge *rx_sg; - struct iser_device *device = iser_conn->device; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; iser_conn->qp_max_recv_dtos = session->cmds_max; iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; - if (device->iser_alloc_rdma_reg_res(iser_conn, session->scsi_cmds_max)) + if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) goto create_rdma_reg_res_failed; if (iser_alloc_login_buf(iser_conn)) @@ -305,7 +306,7 @@ rx_desc_dma_map_failed: rx_desc_alloc_fail: iser_free_login_buf(iser_conn); alloc_login_buf_fail: - device->iser_free_rdma_reg_res(iser_conn); + device->iser_free_rdma_reg_res(ib_conn); create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; @@ -315,13 +316,14 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn) { int i; struct iser_rx_desc *rx_desc; - struct iser_device *device = iser_conn->device; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; if (!iser_conn->rx_descs) goto free_login_buf; if (device->iser_free_rdma_reg_res) - device->iser_free_rdma_reg_res(iser_conn); + device->iser_free_rdma_reg_res(ib_conn); rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) @@ -338,6 +340,7 @@ free_login_buf: static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { struct iser_conn *iser_conn = conn->dd_data; + struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iscsi_session *session = conn->session; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); @@ -350,8 +353,8 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) * response) and no posted send buffers left - they must have been * consumed during previous login phases. */ - WARN_ON(iser_conn->post_recv_buf_count != 1); - WARN_ON(atomic_read(&iser_conn->post_send_buf_count) != 0); + WARN_ON(ib_conn->post_recv_buf_count != 1); + WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0); if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); @@ -426,7 +429,7 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(iser_conn, tx_desc); + err = iser_post_send(&iser_conn->ib_conn, tx_desc); if (!err) return 0; @@ -491,7 +494,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(iser_conn, tx_desc); + err = iser_post_send(&iser_conn->ib_conn, tx_desc); if (!err) return 0; @@ -515,7 +518,7 @@ int iser_send_control(struct iscsi_conn *conn, mdesc->type = ISCSI_TX_CONTROL; iser_create_send_desc(iser_conn, mdesc); - device = iser_conn->device; + device = iser_conn->ib_conn.device; data_seg_len = ntoh24(task->hdr->dlength); @@ -553,7 +556,7 @@ int iser_send_control(struct iscsi_conn *conn, goto send_control_error; } - err = iser_post_send(iser_conn, mdesc); + err = iser_post_send(&iser_conn->ib_conn, mdesc); if (!err) return 0; @@ -567,8 +570,10 @@ send_control_error: */ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, - struct iser_conn *iser_conn) + struct ib_conn *ib_conn) { + struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, + ib_conn); struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen, outstanding, count, err; @@ -582,7 +587,7 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, rx_buflen = ISER_RX_PAYLOAD_SIZE; } - ib_dma_sync_single_for_cpu(iser_conn->device->ib_device, rx_dma, + ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); hdr = &rx_desc->iscsi_header; @@ -593,19 +598,19 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); - ib_dma_sync_single_for_device(iser_conn->device->ib_device, rx_dma, + ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ - iser_conn->post_recv_buf_count--; + ib_conn->post_recv_buf_count--; if (rx_dma == iser_conn->login_resp_dma) return; - outstanding = iser_conn->post_recv_buf_count; + outstanding = ib_conn->post_recv_buf_count; if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { count = min(iser_conn->qp_max_recv_dtos - outstanding, iser_conn->min_posted_rx); @@ -616,10 +621,10 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, } void iser_snd_completion(struct iser_tx_desc *tx_desc, - struct iser_conn *iser_conn) + struct ib_conn *ib_conn) { struct iscsi_task *task; - struct iser_device *device = iser_conn->device; + struct iser_device *device = ib_conn->device; if (tx_desc->type == ISCSI_TX_DATAOUT) { ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, @@ -628,7 +633,7 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc, tx_desc = NULL; } - atomic_dec(&iser_conn->post_send_buf_count); + atomic_dec(&ib_conn->post_send_buf_count); if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ @@ -661,7 +666,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - struct iser_device *device = iser_task->iser_conn->device; + struct iser_device *device = iser_task->iser_conn->ib_conn.device; int is_rdma_data_aligned = 1; int is_rdma_prot_aligned = 1; int prot_count = scsi_prot_sg_count(iser_task->sc); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index ba09fbbe765e..de4db762dc77 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -49,7 +49,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *data_copy, enum iser_data_dir cmd_dir) { - struct ib_device *dev = iser_task->iser_conn->device->ib_device; + struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; struct scatterlist *sgl = (struct scatterlist *)data->buf; struct scatterlist *sg; char *mem = NULL; @@ -116,7 +116,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct ib_device *dev; unsigned long cmd_data_len; - dev = iser_task->iser_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn.device->ib_device; ib_dma_unmap_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? @@ -322,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct ib_device *dev; iser_task->dir[iser_dir] = 1; - dev = iser_task->iser_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn.device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -337,7 +337,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, { struct ib_device *dev; - dev = iser_task->iser_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn.device->ib_device; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } @@ -377,8 +377,8 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *iser_conn = iser_task->iser_conn; - struct iser_device *device = iser_conn->device; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf; @@ -418,8 +418,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, iser_conn->fmr.page_vec, ibdev); - err = iser_reg_page_vec(iser_conn, iser_conn->fmr.page_vec, + iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec, ®d_buf->reg); if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); @@ -427,12 +427,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, mem->dma_nents, ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - iser_conn->fmr.page_vec->data_size, - iser_conn->fmr.page_vec->length, - iser_conn->fmr.page_vec->offset); - for (i = 0; i < iser_conn->fmr.page_vec->length; i++) + ib_conn->fmr.page_vec->data_size, + ib_conn->fmr.page_vec->length, + ib_conn->fmr.page_vec->offset); + for (i = 0; i < ib_conn->fmr.page_vec->length; i++) iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long)iser_conn->fmr.page_vec->pages[i]); + (unsigned long long)ib_conn->fmr.page_vec->pages[i]); } if (err) return err; @@ -533,7 +533,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, struct fast_reg_descriptor *desc, struct ib_sge *data_sge, struct ib_sge *prot_sge, struct ib_sge *sig_sge) { - struct iser_conn *iser_conn = iser_task->iser_conn; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct iser_pi_context *pi_ctx = desc->pi_ctx; struct ib_send_wr sig_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; @@ -579,7 +579,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, else wr->next = &sig_wr; - ret = ib_post_send(iser_conn->qp, wr, &bad_wr); + ret = ib_post_send(ib_conn->qp, wr, &bad_wr); if (ret) { iser_err("reg_sig_mr failed, ret:%d\n", ret); goto err; @@ -609,8 +609,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_sge *sge) { struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; - struct iser_conn *iser_conn = iser_task->iser_conn; - struct iser_device *device = iser_conn->device; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct ib_mr *mr; struct ib_fast_reg_page_list *frpl; @@ -677,7 +677,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, else wr->next = &fastreg_wr; - ret = ib_post_send(iser_conn->qp, wr, &bad_wr); + ret = ib_post_send(ib_conn->qp, wr, &bad_wr); if (ret) { iser_err("fast registration failed, ret:%d\n", ret); return ret; @@ -700,8 +700,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *iser_conn = iser_task->iser_conn; - struct iser_device *device = iser_conn->device; + struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; @@ -724,11 +724,11 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, if (mem->dma_nents != 1 || scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { - spin_lock_irqsave(&iser_conn->lock, flags); - desc = list_first_entry(&iser_conn->fastreg.pool, + spin_lock_irqsave(&ib_conn->lock, flags); + desc = list_first_entry(&ib_conn->fastreg.pool, struct fast_reg_descriptor, list); list_del(&desc->list); - spin_unlock_irqrestore(&iser_conn->lock, flags); + spin_unlock_irqrestore(&ib_conn->lock, flags); regd_buf->reg.mem_h = desc; } @@ -791,9 +791,9 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, return 0; err_reg: if (desc) { - spin_lock_irqsave(&iser_conn->lock, flags); - list_add_tail(&desc->list, &iser_conn->fastreg.pool); - spin_unlock_irqrestore(&iser_conn->lock, flags); + spin_lock_irqsave(&ib_conn->lock, flags); + list_add_tail(&desc->list, &ib_conn->fastreg.pool); + spin_unlock_irqrestore(&ib_conn->lock, flags); } return err; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 778c166916fe..e69aba8eabec 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -213,19 +213,19 @@ static void iser_free_device_ib_res(struct iser_device *device) * * returns 0 on success, or errno code on failure */ -int iser_create_fmr_pool(struct iser_conn *iser_conn, unsigned cmds_max) +int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max) { - struct iser_device *device = iser_conn->device; + struct iser_device *device = ib_conn->device; struct ib_fmr_pool_param params; int ret = -ENOMEM; - iser_conn->fmr.page_vec = kmalloc(sizeof(*iser_conn->fmr.page_vec) + + ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), GFP_KERNEL); - if (!iser_conn->fmr.page_vec) + if (!ib_conn->fmr.page_vec) return ret; - iser_conn->fmr.page_vec->pages = (u64 *)(iser_conn->fmr.page_vec + 1); + ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * @@ -241,16 +241,16 @@ int iser_create_fmr_pool(struct iser_conn *iser_conn, unsigned cmds_max) IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - iser_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); - if (!IS_ERR(iser_conn->fmr.pool)) + ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); + if (!IS_ERR(ib_conn->fmr.pool)) return 0; /* no FMR => no need for page_vec */ - kfree(iser_conn->fmr.page_vec); - iser_conn->fmr.page_vec = NULL; + kfree(ib_conn->fmr.page_vec); + ib_conn->fmr.page_vec = NULL; - ret = PTR_ERR(iser_conn->fmr.pool); - iser_conn->fmr.pool = NULL; + ret = PTR_ERR(ib_conn->fmr.pool); + ib_conn->fmr.pool = NULL; if (ret != -ENOSYS) { iser_err("FMR allocation failed, err %d\n", ret); return ret; @@ -263,18 +263,18 @@ int iser_create_fmr_pool(struct iser_conn *iser_conn, unsigned cmds_max) /** * iser_free_fmr_pool - releases the FMR pool and page vec */ -void iser_free_fmr_pool(struct iser_conn *iser_conn) +void iser_free_fmr_pool(struct ib_conn *ib_conn) { iser_info("freeing conn %p fmr pool %p\n", - iser_conn, iser_conn->fmr.pool); + ib_conn, ib_conn->fmr.pool); - if (iser_conn->fmr.pool != NULL) - ib_destroy_fmr_pool(iser_conn->fmr.pool); + if (ib_conn->fmr.pool != NULL) + ib_destroy_fmr_pool(ib_conn->fmr.pool); - iser_conn->fmr.pool = NULL; + ib_conn->fmr.pool = NULL; - kfree(iser_conn->fmr.page_vec); - iser_conn->fmr.page_vec = NULL; + kfree(ib_conn->fmr.page_vec); + ib_conn->fmr.page_vec = NULL; } static int @@ -367,14 +367,14 @@ fast_reg_mr_failure: * for fast registration work requests. * returns 0 on success, or errno code on failure */ -int iser_create_fastreg_pool(struct iser_conn *iser_conn, unsigned cmds_max) +int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max) { - struct iser_device *device = iser_conn->device; - struct fast_reg_descriptor *desc; + struct iser_device *device = ib_conn->device; + struct fast_reg_descriptor *desc; int i, ret; - INIT_LIST_HEAD(&iser_conn->fastreg.pool); - iser_conn->fastreg.pool_size = 0; + INIT_LIST_HEAD(&ib_conn->fastreg.pool); + ib_conn->fastreg.pool_size = 0; for (i = 0; i < cmds_max; i++) { desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { @@ -384,7 +384,7 @@ int iser_create_fastreg_pool(struct iser_conn *iser_conn, unsigned cmds_max) } ret = iser_create_fastreg_desc(device->ib_device, device->pd, - iser_conn->pi_support, desc); + ib_conn->pi_support, desc); if (ret) { iser_err("Failed to create fastreg descriptor err=%d\n", ret); @@ -392,31 +392,31 @@ int iser_create_fastreg_pool(struct iser_conn *iser_conn, unsigned cmds_max) goto err; } - list_add_tail(&desc->list, &iser_conn->fastreg.pool); - iser_conn->fastreg.pool_size++; + list_add_tail(&desc->list, &ib_conn->fastreg.pool); + ib_conn->fastreg.pool_size++; } return 0; err: - iser_free_fastreg_pool(iser_conn); + iser_free_fastreg_pool(ib_conn); return ret; } /** * iser_free_fastreg_pool - releases the pool of fast_reg descriptors */ -void iser_free_fastreg_pool(struct iser_conn *iser_conn) +void iser_free_fastreg_pool(struct ib_conn *ib_conn) { struct fast_reg_descriptor *desc, *tmp; int i = 0; - if (list_empty(&iser_conn->fastreg.pool)) + if (list_empty(&ib_conn->fastreg.pool)) return; - iser_info("freeing conn %p fr pool\n", iser_conn); + iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &iser_conn->fastreg.pool, list) { + list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { list_del(&desc->list); ib_free_fast_reg_page_list(desc->data_frpl); ib_dereg_mr(desc->data_mr); @@ -430,9 +430,9 @@ void iser_free_fastreg_pool(struct iser_conn *iser_conn) ++i; } - if (i < iser_conn->fastreg.pool_size) + if (i < ib_conn->fastreg.pool_size) iser_warn("pool still has %d regions registered\n", - iser_conn->fastreg.pool_size - i); + ib_conn->fastreg.pool_size - i); } /** @@ -440,16 +440,16 @@ void iser_free_fastreg_pool(struct iser_conn *iser_conn) * * returns 0 on success, -1 on failure */ -static int iser_create_ib_conn_res(struct iser_conn *iser_conn) +static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; int ret = -ENOMEM; int index, min_index = 0; - BUG_ON(iser_conn->device == NULL); + BUG_ON(ib_conn->device == NULL); - device = iser_conn->device; + device = ib_conn->device; memset(&init_attr, 0, sizeof init_attr); @@ -460,11 +460,12 @@ static int iser_create_ib_conn_res(struct iser_conn *iser_conn) device->cq_active_qps[min_index]) min_index = index; device->cq_active_qps[min_index]++; + ib_conn->cq_index = min_index; mutex_unlock(&ig.connlist_mutex); - iser_info("cq index %d used for iser_conn %p\n", min_index, iser_conn); + iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); init_attr.event_handler = iser_qp_event_callback; - init_attr.qp_context = (void *)iser_conn; + init_attr.qp_context = (void *)ib_conn; init_attr.send_cq = device->tx_cq[min_index]; init_attr.recv_cq = device->rx_cq[min_index]; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; @@ -472,21 +473,21 @@ static int iser_create_ib_conn_res(struct iser_conn *iser_conn) init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; - if (iser_conn->pi_support) { + if (ib_conn->pi_support) { init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; } else { init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; } - ret = rdma_create_qp(iser_conn->cma_id, device->pd, &init_attr); + ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) goto out_err; - iser_conn->qp = iser_conn->cma_id->qp; + ib_conn->qp = ib_conn->cma_id->qp; iser_info("setting conn %p cma_id %p qp %p\n", - iser_conn, iser_conn->cma_id, - iser_conn->cma_id->qp); + ib_conn, ib_conn->cma_id, + ib_conn->cma_id->qp); return ret; out_err: @@ -499,23 +500,20 @@ out_err: */ static void iser_free_ib_conn_res(struct iser_conn *iser_conn) { - int cq_index; - BUG_ON(iser_conn == NULL); + struct ib_conn *ib_conn = &iser_conn->ib_conn; iser_info("freeing conn %p cma_id %p qp %p\n", - iser_conn, iser_conn->cma_id, - iser_conn->qp); + ib_conn, ib_conn->cma_id, + ib_conn->qp); /* qp is created only once both addr & route are resolved */ - if (iser_conn->qp != NULL) { - cq_index = ((struct iser_cq_desc *)iser_conn->qp->recv_cq->cq_context)->cq_index; - iser_conn->device->cq_active_qps[cq_index]--; - - rdma_destroy_qp(iser_conn->cma_id); + if (ib_conn->qp != NULL) { + ib_conn->device->cq_active_qps[ib_conn->cq_index]--; + rdma_destroy_qp(ib_conn->cma_id); } - iser_conn->qp = NULL; + ib_conn->qp = NULL; } /** @@ -614,7 +612,8 @@ void iser_release_work(struct work_struct *work) */ void iser_conn_release(struct iser_conn *iser_conn) { - struct iser_device *device = iser_conn->device; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; mutex_lock(&ig.connlist_mutex); list_del(&iser_conn->conn_list); @@ -625,17 +624,17 @@ void iser_conn_release(struct iser_conn *iser_conn) iser_free_rx_descriptors(iser_conn); iser_free_ib_conn_res(iser_conn); - iser_conn->device = NULL; + ib_conn->device = NULL; /* on EVENT_ADDR_ERROR there's no device yet for this conn */ if (device != NULL) iser_device_try_release(device); mutex_unlock(&iser_conn->state_mutex); - /* if cma handler context, the caller actually destroy the id */ - if (iser_conn->cma_id != NULL) { - rdma_destroy_id(iser_conn->cma_id); - iser_conn->cma_id = NULL; + if (ib_conn->cma_id != NULL) { + rdma_destroy_id(ib_conn->cma_id); + ib_conn->cma_id = NULL; } + kfree(iser_conn); } @@ -644,6 +643,7 @@ void iser_conn_release(struct iser_conn *iser_conn) */ void iser_conn_terminate(struct iser_conn *iser_conn) { + struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; /* change the ib conn state only if the conn is UP, however always call @@ -652,7 +652,7 @@ void iser_conn_terminate(struct iser_conn *iser_conn) */ iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); - err = rdma_disconnect(iser_conn->cma_id); + err = rdma_disconnect(ib_conn->cma_id); if (err) iser_err("Failed to disconnect, conn: 0x%p err %d\n", iser_conn, err); @@ -676,6 +676,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; struct iser_conn *iser_conn; + struct ib_conn *ib_conn; int ret; iser_conn = (struct iser_conn *)cma_id->context; @@ -683,6 +684,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) /* bailout */ return; + ib_conn = &iser_conn->ib_conn; device = iser_device_find_by_ib_device(cma_id); if (!device) { iser_err("device lookup/creation failed\n"); @@ -690,7 +692,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) return; } - iser_conn->device = device; + ib_conn->device = device; /* connection T10-PI support */ if (iser_pi_enable) { @@ -698,10 +700,10 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) IB_DEVICE_SIGNATURE_HANDOVER)) { iser_warn("T10-PI requested but not supported on %s, " "continue without T10-PI\n", - iser_conn->device->ib_device->name); - iser_conn->pi_support = false; + ib_conn->device->ib_device->name); + ib_conn->pi_support = false; } else { - iser_conn->pi_support = true; + ib_conn->pi_support = true; } } @@ -722,13 +724,14 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) int ret; struct iser_cm_hdr req_hdr; struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; - struct iser_device *device = iser_conn->device; + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; - ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); + ret = iser_create_ib_conn_res(ib_conn); if (ret) goto failure; @@ -776,6 +779,7 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id) static void iser_disconnected_handler(struct rdma_cm_id *cma_id) { struct iser_conn *iser_conn; + struct ib_conn *ib_conn = &iser_conn->ib_conn; iser_conn = (struct iser_conn *)cma_id->context; @@ -793,8 +797,8 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) * block also exists in iser_handle_comp_error(), but it is needed here * for cases of no flushes at all, e.g. discovery over rdma. */ - if (iser_conn->post_recv_buf_count == 0 && - (atomic_read(&iser_conn->post_send_buf_count) == 0)) { + if (ib_conn->post_recv_buf_count == 0 && + (atomic_read(&ib_conn->post_send_buf_count) == 0)) { complete(&iser_conn->flush_completion); } } @@ -842,13 +846,13 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve void iser_conn_init(struct iser_conn *iser_conn) { iser_conn->state = ISER_CONN_INIT; - iser_conn->post_recv_buf_count = 0; - atomic_set(&iser_conn->post_send_buf_count, 0); + iser_conn->ib_conn.post_recv_buf_count = 0; + atomic_set(&iser_conn->ib_conn.post_send_buf_count, 0); init_completion(&iser_conn->stop_completion); init_completion(&iser_conn->flush_completion); init_completion(&iser_conn->up_completion); INIT_LIST_HEAD(&iser_conn->conn_list); - spin_lock_init(&iser_conn->lock); + spin_lock_init(&iser_conn->ib_conn.lock); mutex_init(&iser_conn->state_mutex); } @@ -861,6 +865,7 @@ int iser_connect(struct iser_conn *iser_conn, struct sockaddr *dst_addr, int non_blocking) { + struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; mutex_lock(&iser_conn->state_mutex); @@ -870,20 +875,20 @@ int iser_connect(struct iser_conn *iser_conn, iser_info("connecting to: %s\n", iser_conn->name); /* the device is known only --after-- address resolution */ - iser_conn->device = NULL; + ib_conn->device = NULL; iser_conn->state = ISER_CONN_PENDING; - iser_conn->cma_id = rdma_create_id(iser_cma_handler, - (void *)iser_conn, - RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(iser_conn->cma_id)) { - err = PTR_ERR(iser_conn->cma_id); + ib_conn->cma_id = rdma_create_id(iser_cma_handler, + (void *)iser_conn, + RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(ib_conn->cma_id)) { + err = PTR_ERR(ib_conn->cma_id); iser_err("rdma_create_id failed: %d\n", err); goto id_failure; } - err = rdma_resolve_addr(iser_conn->cma_id, src_addr, dst_addr, 1000); + err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000); if (err) { iser_err("rdma_resolve_addr failed: %d\n", err); goto addr_failure; @@ -905,7 +910,7 @@ int iser_connect(struct iser_conn *iser_conn, return 0; id_failure: - iser_conn->cma_id = NULL; + ib_conn->cma_id = NULL; addr_failure: iser_conn->state = ISER_CONN_DOWN; connect_failure: @@ -919,7 +924,7 @@ connect_failure: * * returns: 0 on success, errno code on failure */ -int iser_reg_page_vec(struct iser_conn *iser_conn, +int iser_reg_page_vec(struct ib_conn *ib_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg) { @@ -931,7 +936,7 @@ int iser_reg_page_vec(struct iser_conn *iser_conn, page_list = page_vec->pages; io_addr = page_list[0]; - mem = ib_fmr_pool_map_phys(iser_conn->fmr.pool, + mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool, page_list, page_vec->length, io_addr); @@ -990,6 +995,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, { struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; struct iser_conn *iser_conn = iser_task->iser_conn; + struct ib_conn *ib_conn = &iser_conn->ib_conn; struct fast_reg_descriptor *desc = reg->mem_h; if (!reg->is_mr) @@ -997,31 +1003,32 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, reg->mem_h = NULL; reg->is_mr = 0; - spin_lock_bh(&iser_conn->lock); - list_add_tail(&desc->list, &iser_conn->fastreg.pool); - spin_unlock_bh(&iser_conn->lock); + spin_lock_bh(&ib_conn->lock); + list_add_tail(&desc->list, &ib_conn->fastreg.pool); + spin_unlock_bh(&ib_conn->lock); } int iser_post_recvl(struct iser_conn *iser_conn) { struct ib_recv_wr rx_wr, *rx_wr_failed; + struct ib_conn *ib_conn = &iser_conn->ib_conn; struct ib_sge sge; int ib_ret; sge.addr = iser_conn->login_resp_dma; sge.length = ISER_RX_LOGIN_SIZE; - sge.lkey = iser_conn->device->mr->lkey; + sge.lkey = ib_conn->device->mr->lkey; rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; rx_wr.next = NULL; - iser_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(iser_conn->qp, &rx_wr, &rx_wr_failed); + ib_conn->post_recv_buf_count++; + ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); - iser_conn->post_recv_buf_count--; + ib_conn->post_recv_buf_count--; } return ib_ret; } @@ -1030,10 +1037,11 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) { struct ib_recv_wr *rx_wr, *rx_wr_failed; int i, ib_ret; + struct ib_conn *ib_conn = &iser_conn->ib_conn; unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; - for (rx_wr = iser_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { + for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { rx_desc = &iser_conn->rx_descs[my_rx_head]; rx_wr->wr_id = (unsigned long)rx_desc; rx_wr->sg_list = &rx_desc->rx_sg; @@ -1045,11 +1053,11 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ - iser_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(iser_conn->qp, iser_conn->rx_wr, &rx_wr_failed); + ib_conn->post_recv_buf_count += count; + ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); - iser_conn->post_recv_buf_count -= count; + ib_conn->post_recv_buf_count -= count; } else iser_conn->rx_desc_head = my_rx_head; return ib_ret; @@ -1061,12 +1069,12 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) * * returns 0 on success, -1 on failure */ -int iser_post_send(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) { int ib_ret; struct ib_send_wr send_wr, *send_wr_failed; - ib_dma_sync_single_for_device(iser_conn->device->ib_device, + ib_dma_sync_single_for_device(ib_conn->device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -1077,24 +1085,27 @@ int iser_post_send(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - atomic_inc(&iser_conn->post_send_buf_count); + atomic_inc(&ib_conn->post_send_buf_count); - ib_ret = ib_post_send(iser_conn->qp, &send_wr, &send_wr_failed); + ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); if (ib_ret) { iser_err("ib_post_send failed, ret:%d\n", ib_ret); - atomic_dec(&iser_conn->post_send_buf_count); + atomic_dec(&ib_conn->post_send_buf_count); } return ib_ret; } static void iser_handle_comp_error(struct iser_tx_desc *desc, - struct iser_conn *iser_conn) + struct ib_conn *ib_conn) { + struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, + ib_conn); + if (desc && desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, desc); - if (iser_conn->post_recv_buf_count == 0 && - atomic_read(&iser_conn->post_send_buf_count) == 0) { + if (ib_conn->post_recv_buf_count == 0 && + atomic_read(&ib_conn->post_send_buf_count) == 0) { /** * getting here when the state is UP means that the conn is * being terminated asynchronously from the iSCSI layer's @@ -1116,15 +1127,15 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) struct ib_cq *cq = device->tx_cq[cq_index]; struct ib_wc wc; struct iser_tx_desc *tx_desc; - struct iser_conn *iser_conn; + struct ib_conn *ib_conn; int completed_tx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; - iser_conn = wc.qp->qp_context; + ib_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_SEND) - iser_snd_completion(tx_desc, iser_conn); + iser_snd_completion(tx_desc, ib_conn); else iser_err("expected opcode %d got %d\n", IB_WC_SEND, wc.opcode); @@ -1132,8 +1143,8 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) iser_err("tx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); if (wc.wr_id != ISER_FASTREG_LI_WRID) { - atomic_dec(&iser_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, iser_conn); + atomic_dec(&ib_conn->post_send_buf_count); + iser_handle_comp_error(tx_desc, ib_conn); } } completed_tx++; @@ -1151,7 +1162,7 @@ static void iser_cq_tasklet_fn(unsigned long data) struct ib_wc wc; struct iser_rx_desc *desc; unsigned long xfer_len; - struct iser_conn *iser_conn; + struct ib_conn *ib_conn; int completed_tx, completed_rx = 0; /* First do tx drain, so in a case where we have rx flushes and a successful @@ -1162,11 +1173,11 @@ static void iser_cq_tasklet_fn(unsigned long data) while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); - iser_conn = wc.qp->qp_context; + ib_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_RECV) { xfer_len = (unsigned long)wc.byte_len; - iser_rcv_completion(desc, xfer_len, iser_conn); + iser_rcv_completion(desc, xfer_len, ib_conn); } else iser_err("expected opcode %d got %d\n", IB_WC_RECV, wc.opcode); @@ -1174,8 +1185,8 @@ static void iser_cq_tasklet_fn(unsigned long data) if (wc.status != IB_WC_WR_FLUSH_ERR) iser_err("rx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); - iser_conn->post_recv_buf_count--; - iser_handle_comp_error(NULL, iser_conn); + ib_conn->post_recv_buf_count--; + iser_handle_comp_error(NULL, ib_conn); } completed_rx++; if (!(completed_rx & 63)) -- cgit v1.2.3 From 6bb0279f95dc74082184d91b65743b295a2a11a6 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 1 Oct 2014 14:01:59 +0300 Subject: IB/iser: Remove unused variables and dead code Signed-off-by: Roi Dayan Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 4ad73c91e531..ec238b3bd278 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -365,7 +365,6 @@ struct iser_conn { struct iscsi_conn *iscsi_conn; struct iscsi_endpoint *ep; enum iser_conn_state state; /* rdma connection state */ - atomic_t refcount; unsigned qp_max_recv_dtos; /* num of rx buffers */ unsigned qp_max_recv_dtos_mask; /* above minus 1 */ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ @@ -424,9 +423,6 @@ extern int iser_debug_level; extern bool iser_pi_enable; extern int iser_pi_guard; -/* allocate connection resources needed for rdma functionality */ -int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); - int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task); -- cgit v1.2.3 From 96f15198c1457df29b51ed151b1e5b2a223d1346 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:00 +0300 Subject: IB/iser: Extend iser_free_ib_conn_res() Put all connection IB related resources release in this routine. One exception is the cm_id which cannot be destroyed as the routine is protected by the state mutex. Also move its position to avoid forward declaration. While at it fix qp NULL assignment. Signed-off-by: Sagi Grimberg Signed-off-by: Ariel Nahum Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 58 +++++++++++++++++--------------- 1 file changed, 30 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index e69aba8eabec..e4299743c459 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -495,27 +495,6 @@ out_err: return ret; } -/** - * releases the QP object - */ -static void iser_free_ib_conn_res(struct iser_conn *iser_conn) -{ - struct ib_conn *ib_conn = &iser_conn->ib_conn; - - iser_info("freeing conn %p cma_id %p qp %p\n", - ib_conn, ib_conn->cma_id, - ib_conn->qp); - - /* qp is created only once both addr & route are resolved */ - - if (ib_conn->qp != NULL) { - ib_conn->device->cq_active_qps[ib_conn->cq_index]--; - rdma_destroy_qp(ib_conn->cma_id); - } - - ib_conn->qp = NULL; -} - /** * based on the resolved device node GUID see if there already allocated * device for this device. If there's no such, create one. @@ -607,13 +586,42 @@ void iser_release_work(struct work_struct *work) iser_conn_release(iser_conn); } +/** + * iser_free_ib_conn_res - release IB related resources + * @iser_conn: iser connection struct + * + * This routine is called with the iser state mutex held + * so the cm_id removal is out of here. It is Safe to + * be invoked multiple times. + */ +static void iser_free_ib_conn_res(struct iser_conn *iser_conn) +{ + struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; + + iser_info("freeing conn %p cma_id %p qp %p\n", + iser_conn, ib_conn->cma_id, ib_conn->qp); + + iser_free_rx_descriptors(iser_conn); + + if (ib_conn->qp != NULL) { + ib_conn->device->cq_active_qps[ib_conn->cq_index]--; + rdma_destroy_qp(ib_conn->cma_id); + ib_conn->qp = NULL; + } + + if (device != NULL) { + iser_device_try_release(device); + ib_conn->device = NULL; + } +} + /** * Frees all conn objects and deallocs conn descriptor */ void iser_conn_release(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; - struct iser_device *device = ib_conn->device; mutex_lock(&ig.connlist_mutex); list_del(&iser_conn->conn_list); @@ -621,13 +629,7 @@ void iser_conn_release(struct iser_conn *iser_conn) mutex_lock(&iser_conn->state_mutex); BUG_ON(iser_conn->state != ISER_CONN_DOWN); - - iser_free_rx_descriptors(iser_conn); iser_free_ib_conn_res(iser_conn); - ib_conn->device = NULL; - /* on EVENT_ADDR_ERROR there's no device yet for this conn */ - if (device != NULL) - iser_device_try_release(device); mutex_unlock(&iser_conn->state_mutex); if (ib_conn->cma_id != NULL) { -- cgit v1.2.3 From c47a3c9ed5be167f49a6fd3f696dac03536282eb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:01 +0300 Subject: IB/iser: Fix DEVICE REMOVAL handling in the absence of iscsi daemon iscsi daemon is in user-space, thus we can't rely on it to be invoked at connection teardown (if not running or does not receive CPU time). This patch addresses the issue by re-structuring iSER connection teardown logic and CM events handling. The CM events will dictate the RDMA resources destruction (ib_conn) and iser_conn is kept around as long as iscsi_conn is left around allowing iscsi/iser callbacks to continue after RDMA transport was destroyed. This patch introduces a separation in logic when handling CM events: - DISCONNECTED_HANDLER, ADDR_CHANGED This events indicate the start of teardown process. Actions: 1. Terminate the connection: rdma_disconnect (send DREQ/DREP) 2. Notify iSCSI of connection failure 3. Change state to TERMINATING 4. Poll for all flush errors to be consumed - TIMEWAIT_EXIT, DEVICE_REMOVAL These events indicate the final stage of termination process and we can free RDMA related resources. Actions: 1. Call disconnected handler (we are not guaranteed that DISCONNECTED event was invoked in the past) 2. Cleanup RDMA related resources 3. For DEVICE_REMOVAL return non-zero rc from cma_handler to implicitly destroy the cm_id (Can't rely on user-space, make sure we have forward progress) We replace flush_completion (indicate all flushes were consumed) with ib_completion (rdma resources were cleaned up). The iser_conn_release_work will wait for teardown completions: - conn_stop was completed (tasks were cleaned-up) - stop_completion - RDMA resources were destroyed - ib_completion And then will continue to free iser connection representation (iser_conn). Signed-off-by: Sagi Grimberg Signed-off-by: Ariel Nahum Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 6 +- drivers/infiniband/ulp/iser/iser_verbs.c | 163 ++++++++++++++++++++----------- 2 files changed, 108 insertions(+), 61 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index ec238b3bd278..95c484d0f881 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -370,9 +370,9 @@ struct iser_conn { unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; - struct completion stop_completion; struct mutex state_mutex; - struct completion flush_completion; + struct completion stop_completion; + struct completion ib_completion; struct completion up_completion; struct list_head conn_list; /* entry in ig conn list */ @@ -442,7 +442,7 @@ void iser_conn_init(struct iser_conn *iser_conn); void iser_conn_release(struct iser_conn *iser_conn); -void iser_conn_terminate(struct iser_conn *iser_conn); +int iser_conn_terminate(struct iser_conn *iser_conn); void iser_release_work(struct work_struct *work); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index e4299743c459..6170d06a8acc 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -44,6 +44,7 @@ static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); +static int iser_drain_tx_cq(struct iser_device *device, int cq_index); static void iser_cq_event_callback(struct ib_event *cause, void *context) { @@ -573,11 +574,10 @@ void iser_release_work(struct work_struct *work) rc = wait_for_completion_timeout(&iser_conn->stop_completion, 30 * HZ); WARN_ON(rc == 0); - /* wait for the qp`s post send and post receive buffers to empty */ - rc = wait_for_completion_timeout(&iser_conn->flush_completion, 30 * HZ); - WARN_ON(rc == 0); - - iser_conn->state = ISER_CONN_DOWN; + rc = wait_for_completion_timeout(&iser_conn->ib_completion, 30 * HZ); + if (rc == 0) + iser_warn("conn %p, IB cleanup didn't complete in 30 " + "seconds, continue with release\n", iser_conn); mutex_lock(&iser_conn->state_mutex); iser_conn->state = ISER_CONN_DOWN; @@ -589,12 +589,16 @@ void iser_release_work(struct work_struct *work) /** * iser_free_ib_conn_res - release IB related resources * @iser_conn: iser connection struct + * @destroy_device: indicator if we need to try to release + * the iser device (only iscsi shutdown and DEVICE_REMOVAL + * will use this. * * This routine is called with the iser state mutex held * so the cm_id removal is out of here. It is Safe to * be invoked multiple times. */ -static void iser_free_ib_conn_res(struct iser_conn *iser_conn) +static void iser_free_ib_conn_res(struct iser_conn *iser_conn, + bool destroy_device) { struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; @@ -610,7 +614,7 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn) ib_conn->qp = NULL; } - if (device != NULL) { + if (destroy_device && device != NULL) { iser_device_try_release(device); ib_conn->device = NULL; } @@ -629,7 +633,11 @@ void iser_conn_release(struct iser_conn *iser_conn) mutex_lock(&iser_conn->state_mutex); BUG_ON(iser_conn->state != ISER_CONN_DOWN); - iser_free_ib_conn_res(iser_conn); + /* + * In case we never got to bind stage, we still need to + * release IB resources (which is safe to call more than once). + */ + iser_free_ib_conn_res(iser_conn, true); mutex_unlock(&iser_conn->state_mutex); if (ib_conn->cma_id != NULL) { @@ -640,24 +648,69 @@ void iser_conn_release(struct iser_conn *iser_conn) kfree(iser_conn); } +/** + * iser_poll_for_flush_errors - Don't settle for less than all. + * @struct ib_conn: IB context of the connection + * + * This routine is called when the QP is in error state + * It polls the send CQ until all flush errors are consumed and + * returns when all flush errors were processed. + */ +static void iser_poll_for_flush_errors(struct ib_conn *ib_conn) +{ + struct iser_device *device = ib_conn->device; + int count = 0; + + while (ib_conn->post_recv_buf_count > 0 || + atomic_read(&ib_conn->post_send_buf_count) > 0) { + msleep(100); + if (atomic_read(&ib_conn->post_send_buf_count) > 0) + iser_drain_tx_cq(device, ib_conn->cq_index); + + count++; + /* Don't flood with prints */ + if (count % 30 == 0) + iser_dbg("post_recv %d post_send %d", + ib_conn->post_recv_buf_count, + atomic_read(&ib_conn->post_send_buf_count)); + } +} + /** * triggers start of the disconnect procedures and wait for them to be done + * Called with state mutex held */ -void iser_conn_terminate(struct iser_conn *iser_conn) +int iser_conn_terminate(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; - /* change the ib conn state only if the conn is UP, however always call - * rdma_disconnect since this is the only way to cause the CMA to change - * the QP state to ERROR + /* terminate the iser conn only if the conn state is UP */ + if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, + ISER_CONN_TERMINATING)) + return 0; + + iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state); + + /* suspend queuing of new iscsi commands */ + if (iser_conn->iscsi_conn) + iscsi_suspend_queue(iser_conn->iscsi_conn); + + /* + * In case we didn't already clean up the cma_id (peer initiated + * a disconnection), we need to Cause the CMA to change the QP + * state to ERROR. */ + if (ib_conn->cma_id) { + err = rdma_disconnect(ib_conn->cma_id); + if (err) + iser_err("Failed to disconnect, conn: 0x%p err %d\n", + iser_conn, err); + + iser_poll_for_flush_errors(ib_conn); + } - iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); - err = rdma_disconnect(ib_conn->cma_id); - if (err) - iser_err("Failed to disconnect, conn: 0x%p err %d\n", - iser_conn, err); + return 1; } /** @@ -780,34 +833,36 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id) static void iser_disconnected_handler(struct rdma_cm_id *cma_id) { - struct iser_conn *iser_conn; - struct ib_conn *ib_conn = &iser_conn->ib_conn; - - iser_conn = (struct iser_conn *)cma_id->context; + struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; - /* getting here when the state is UP means that the conn is being * - * terminated asynchronously from the iSCSI layer's perspective. */ - if (iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)){ + if (iser_conn_terminate(iser_conn)) { if (iser_conn->iscsi_conn) - iscsi_conn_failure(iser_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); else iser_err("iscsi_iser connection isn't bound\n"); } +} + +static void iser_cleanup_handler(struct rdma_cm_id *cma_id, + bool destroy_device) +{ + struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; - /* Complete the termination process if no posts are pending. This code - * block also exists in iser_handle_comp_error(), but it is needed here - * for cases of no flushes at all, e.g. discovery over rdma. + /* + * We are not guaranteed that we visited disconnected_handler + * by now, call it here to be safe that we handle CM drep + * and flush errors. */ - if (ib_conn->post_recv_buf_count == 0 && - (atomic_read(&ib_conn->post_send_buf_count) == 0)) { - complete(&iser_conn->flush_completion); - } -} + iser_disconnected_handler(cma_id); + iser_free_ib_conn_res(iser_conn, destroy_device); + complete(&iser_conn->ib_completion); +}; static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct iser_conn *iser_conn; + int ret = 0; iser_conn = (struct iser_conn *)cma_id->context; iser_info("event %d status %d conn %p id %p\n", @@ -832,17 +887,29 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve iser_connect_error(cma_id); break; case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_ADDR_CHANGE: - case RDMA_CM_EVENT_TIMEWAIT_EXIT: iser_disconnected_handler(cma_id); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + /* + * we *must* destroy the device as we cannot rely + * on iscsid to be around to initiate error handling. + * also implicitly destroy the cma_id. + */ + iser_cleanup_handler(cma_id, true); + iser_conn->ib_conn.cma_id = NULL; + ret = 1; + break; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + iser_cleanup_handler(cma_id, false); + break; default: iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } mutex_unlock(&iser_conn->state_mutex); - return 0; + + return ret; } void iser_conn_init(struct iser_conn *iser_conn) @@ -851,7 +918,7 @@ void iser_conn_init(struct iser_conn *iser_conn) iser_conn->ib_conn.post_recv_buf_count = 0; atomic_set(&iser_conn->ib_conn.post_send_buf_count, 0); init_completion(&iser_conn->stop_completion); - init_completion(&iser_conn->flush_completion); + init_completion(&iser_conn->ib_completion); init_completion(&iser_conn->up_completion); INIT_LIST_HEAD(&iser_conn->conn_list); spin_lock_init(&iser_conn->ib_conn.lock); @@ -1100,28 +1167,8 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) static void iser_handle_comp_error(struct iser_tx_desc *desc, struct ib_conn *ib_conn) { - struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, - ib_conn); - if (desc && desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, desc); - - if (ib_conn->post_recv_buf_count == 0 && - atomic_read(&ib_conn->post_send_buf_count) == 0) { - /** - * getting here when the state is UP means that the conn is - * being terminated asynchronously from the iSCSI layer's - * perspective. It is safe to peek at the connection state - * since iscsi_conn_failure is allowed to be called twice. - **/ - if (iser_conn->state == ISER_CONN_UP) - iscsi_conn_failure(iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); - - /* no more non completed posts to the QP, complete the - * termination process w.o worrying on disconnect event */ - complete(&iser_conn->flush_completion); - } } static int iser_drain_tx_cq(struct iser_device *device, int cq_index) -- cgit v1.2.3 From c107a6c0cf1ab883ea87ca50136f4fc5204a4b82 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:02 +0300 Subject: IB/iser: Don't bound release_work completions timeouts We no longer rely on iscsi connection teardown sequence, so no need to give a grace period and continue cleanup if it expired. Have iser_conn_release wait for full completion before freeing iser_conn. ib_completion: Guaranteed to come when: - Got DISCONNECTED/ADDR_CHANGE event or - iSCSI called ep_disconnect/conn_stop Guaranteed to finish when: - Got TIMEWAIT_EXIT/DEVICE_REMOVAL event - All Flush errors are consumed - IB related resources are destroyed stop_completion: Guaranteed to come when: - iSCSI calls conn_stop Guaranteed to finish when: - All inflight tasks were cleaned up Signed-off-by: Sagi Grimberg Signed-off-by: Ariel Nahum Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 6170d06a8acc..6ce20fd9abac 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -566,18 +566,13 @@ static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, void iser_release_work(struct work_struct *work) { struct iser_conn *iser_conn; - int rc; iser_conn = container_of(work, struct iser_conn, release_work); - /* wait for .conn_stop callback */ - rc = wait_for_completion_timeout(&iser_conn->stop_completion, 30 * HZ); - WARN_ON(rc == 0); - - rc = wait_for_completion_timeout(&iser_conn->ib_completion, 30 * HZ); - if (rc == 0) - iser_warn("conn %p, IB cleanup didn't complete in 30 " - "seconds, continue with release\n", iser_conn); + /* Wait for conn_stop to complete */ + wait_for_completion(&iser_conn->stop_completion); + /* Wait for IB resouces cleanup to complete */ + wait_for_completion(&iser_conn->ib_completion); mutex_lock(&iser_conn->state_mutex); iser_conn->state = ISER_CONN_DOWN; -- cgit v1.2.3 From ec370e2b63526931a65f4668626dbb43896788c6 Mon Sep 17 00:00:00 2001 From: Ariel Nahum Date: Wed, 1 Oct 2014 14:02:03 +0300 Subject: IB/iser: Unbind at conn_stop stage Previously we didn't need to unbind the iser_conn and iscsi_conn since we always relied on iscsi daemon to teardown the connection and never let it finish before we cleanup all that is needed in iser. This is not the case anymore (for DEVICE_REMOVAL event). So avoid any possible chance we cause iscsi_conn dereference after iscsi_conn was freed. We also call iser_conn_terminate (safe to call multiple times) just for the corner case of iscsi daemon stopping an old connection before invoking endpoint removal (might happen if it was violently killed). Notice we are unbinding under a lock - which is required. Signed-off-by: Ariel Nahum Signed-off-by: Sagi Grimberg Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index db83530184f8..7298e696c6cf 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -414,8 +414,15 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) * might have only partially setup the connection. */ if (iser_conn) { + mutex_lock(&iser_conn->state_mutex); + iser_conn_terminate(iser_conn); + + /* unbind */ + iser_conn->iscsi_conn = NULL; conn->dd_data = NULL; + complete(&iser_conn->stop_completion); + mutex_unlock(&iser_conn->state_mutex); } } -- cgit v1.2.3 From 3a940daf6fa105d28b69cf3b7a3739a3777f4185 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:04 +0300 Subject: IB/iser: Protect tasks cleanup in case IB device was already released Bailout in case a task cleanup (iscsi_iser_cleanup_task) is called after the IB device was removed (DEVICE_REMOVAL CM event). We also call iscsi_conn_stop with a lock taken to prevent DEVICE_REMOVAL and tasks cleanup from racing. Signed-off-by: Sagi Grimberg Signed-off-by: Ariel Nahum Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 7298e696c6cf..81d69a30bcca 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -293,6 +293,10 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) struct iser_conn *iser_conn = task->conn->dd_data; struct iser_device *device = iser_conn->ib_conn.device; + /* DEVICE_REMOVAL event might have already released the device */ + if (!device) + return; + ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -407,7 +411,6 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) struct iser_conn *iser_conn = conn->dd_data; iser_dbg("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn); - iscsi_conn_stop(cls_conn, flag); /* * Userspace may have goofed up and not bound the connection or @@ -415,6 +418,7 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) */ if (iser_conn) { mutex_lock(&iser_conn->state_mutex); + iscsi_conn_stop(cls_conn, flag); iser_conn_terminate(iser_conn); /* unbind */ @@ -423,6 +427,8 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) complete(&iser_conn->stop_completion); mutex_unlock(&iser_conn->state_mutex); + } else { + iscsi_conn_stop(cls_conn, flag); } } -- cgit v1.2.3 From 8c204e69ced1a8c0d74f8b6d7a1393d055c5c4fa Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:05 +0300 Subject: IB/iser: Signal iSCSI layer that transport is broken in error completions Previously we notified iscsi layer about the connection layer when we consumed all of our flush errors. This was racy as there was no guarentee that iscsi_conn wasn't terminated by then (which ends up in an invalid memory access). In case we got a non FLUSH error completion, we are guarenteed that iscsi_conn is still alive. We should notify iSCSI layer with iscsi_conn_failure to initiate error handling. While we are at it, add a nice kernel-doc style documentation. Signed-off-by: Sagi Grimberg Signed-off-by: Ariel Nahum Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 6ce20fd9abac..35f53a3fdf21 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1159,9 +1159,30 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) return ib_ret; } -static void iser_handle_comp_error(struct iser_tx_desc *desc, - struct ib_conn *ib_conn) +/** + * iser_handle_comp_error() - Handle error completion + * @desc: iser TX descriptor + * @ib_conn: connection RDMA resources + * @wc: work completion + * + * Notes: We may handle a FLUSH error completion and in this case + * we only cleanup in case TX type was DATAOUT. For non-FLUSH + * error completion we should also notify iscsi layer that + * connection is failed (in case we passed bind stage). + */ +static void +iser_handle_comp_error(struct iser_tx_desc *desc, + struct ib_conn *ib_conn, + struct ib_wc *wc) { + struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, + ib_conn); + + if (wc->status != IB_WC_WR_FLUSH_ERR) + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + if (desc && desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, desc); } @@ -1188,7 +1209,7 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) wc.wr_id, wc.status, wc.vendor_err); if (wc.wr_id != ISER_FASTREG_LI_WRID) { atomic_dec(&ib_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, ib_conn); + iser_handle_comp_error(tx_desc, ib_conn, &wc); } } completed_tx++; @@ -1230,7 +1251,7 @@ static void iser_cq_tasklet_fn(unsigned long data) iser_err("rx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); ib_conn->post_recv_buf_count--; - iser_handle_comp_error(NULL, ib_conn); + iser_handle_comp_error(NULL, ib_conn, &wc); } completed_rx++; if (!(completed_rx & 63)) -- cgit v1.2.3 From aea8f4df6da46add468c44875348e1045bffeeb7 Mon Sep 17 00:00:00 2001 From: Ariel Nahum Date: Wed, 1 Oct 2014 14:02:06 +0300 Subject: IB/iser: Use iser_warn instead of BUG_ON in iser_conn_release In case iscsid was violently killed (SIGKILL) during its error recovery stage, we may never get a connection teardown sequence for some of the old connections. No harm done, but when we try to unload the module we will need to cleanup all these connections. So we actually may end-up here - so it's not a BUG_ON(), just give a relaxed warning that this happened and continue with normal unload. BUG_ON() will cause segfault on module_exit and we don't want that. Signed-off-by: Ariel Nahum Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 35f53a3fdf21..57b20c691367 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -627,7 +627,9 @@ void iser_conn_release(struct iser_conn *iser_conn) mutex_unlock(&ig.connlist_mutex); mutex_lock(&iser_conn->state_mutex); - BUG_ON(iser_conn->state != ISER_CONN_DOWN); + if (iser_conn->state != ISER_CONN_DOWN) + iser_warn("iser conn %p state %d, expected state down.\n", + iser_conn, iser_conn->state); /* * In case we never got to bind stage, we still need to * release IB resources (which is safe to call more than once). -- cgit v1.2.3 From bf17554035ab2aaf770321208ce48e69aab71cc8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:07 +0300 Subject: IB/iser: Centralize iser completion contexts Introduce iser_comp which centralizes all iser completion related items and is referenced by iser_device and each ib_conn. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 35 +++++--- drivers/infiniband/ulp/iser/iser_verbs.c | 136 ++++++++++++++----------------- 2 files changed, 84 insertions(+), 87 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 95c484d0f881..2bc34aa50705 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -213,7 +213,6 @@ struct iser_data_buf { /* fwd declarations */ struct iser_device; -struct iser_cq_desc; struct iscsi_iser_task; struct iscsi_endpoint; @@ -268,20 +267,34 @@ struct iser_conn; struct ib_conn; struct iscsi_iser_task; +/** + * struct iser_comp - iSER completion context + * + * @device: pointer to device handle + * @rx_cq: RX completion queue + * @tx_cq: TX completion queue + * @tasklet: Tasklet handle + * @active_qps: Number of active QPs attached + * to completion context + */ +struct iser_comp { + struct iser_device *device; + struct ib_cq *rx_cq; + struct ib_cq *tx_cq; + struct tasklet_struct tasklet; + int active_qps; +}; + struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; struct ib_device_attr dev_attr; - struct ib_cq *rx_cq[ISER_MAX_CQ]; - struct ib_cq *tx_cq[ISER_MAX_CQ]; struct ib_mr *mr; - struct tasklet_struct cq_tasklet[ISER_MAX_CQ]; struct ib_event_handler event_handler; struct list_head ig_list; /* entry in ig devices list */ int refcount; - int cq_active_qps[ISER_MAX_CQ]; - int cqs_used; - struct iser_cq_desc *cq_desc; + int comps_used; + struct iser_comp comps[ISER_MAX_CQ]; int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn, unsigned cmds_max); void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn); @@ -327,6 +340,7 @@ struct fast_reg_descriptor { * @post_send_buf_count: post send counter * @rx_wr: receive work request for batch posts * @device: reference to iser device + * @comp: iser completion context * @pi_support: Indicate device T10-PI support * @lock: protects fmr/fastreg pool * @union.fmr: @@ -345,7 +359,7 @@ struct ib_conn { atomic_t post_send_buf_count; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; - int cq_index; + struct iser_comp *comp; bool pi_support; spinlock_t lock; union { @@ -404,11 +418,6 @@ struct iser_page_vec { int data_size; }; -struct iser_cq_desc { - struct iser_device *device; - int cq_index; -}; - struct iser_global { struct mutex device_list_mutex;/* */ struct list_head device_list; /* all iSER devices */ diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 57b20c691367..94d1b46b467a 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -44,7 +44,7 @@ static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); -static int iser_drain_tx_cq(struct iser_device *device, int cq_index); +static int iser_drain_tx_cq(struct iser_comp *comp); static void iser_cq_event_callback(struct ib_event *cause, void *context) { @@ -72,7 +72,6 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - struct iser_cq_desc *cq_desc; struct ib_device_attr *dev_attr = &device->dev_attr; int ret, i; @@ -102,51 +101,44 @@ static int iser_create_device_ib_res(struct iser_device *device) return -1; } - device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); + device->comps_used = min(ISER_MAX_CQ, + device->ib_device->num_comp_vectors); iser_info("using %d CQs, device %s supports %d vectors\n", - device->cqs_used, device->ib_device->name, + device->comps_used, device->ib_device->name, device->ib_device->num_comp_vectors); - device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, - GFP_KERNEL); - if (device->cq_desc == NULL) - goto cq_desc_err; - cq_desc = device->cq_desc; - device->pd = ib_alloc_pd(device->ib_device); if (IS_ERR(device->pd)) goto pd_err; - for (i = 0; i < device->cqs_used; i++) { - cq_desc[i].device = device; - cq_desc[i].cq_index = i; - - device->rx_cq[i] = ib_create_cq(device->ib_device, - iser_cq_callback, - iser_cq_event_callback, - (void *)&cq_desc[i], - ISER_MAX_RX_CQ_LEN, i); - if (IS_ERR(device->rx_cq[i])) { - device->rx_cq[i] = NULL; + for (i = 0; i < device->comps_used; i++) { + struct iser_comp *comp = &device->comps[i]; + + comp->device = device; + comp->rx_cq = ib_create_cq(device->ib_device, + iser_cq_callback, + iser_cq_event_callback, + (void *)comp, + ISER_MAX_RX_CQ_LEN, i); + if (IS_ERR(comp->rx_cq)) { + comp->rx_cq = NULL; goto cq_err; } - device->tx_cq[i] = ib_create_cq(device->ib_device, - NULL, iser_cq_event_callback, - (void *)&cq_desc[i], - ISER_MAX_TX_CQ_LEN, i); - - if (IS_ERR(device->tx_cq[i])) { - device->tx_cq[i] = NULL; + comp->tx_cq = ib_create_cq(device->ib_device, NULL, + iser_cq_event_callback, + (void *)comp, + ISER_MAX_TX_CQ_LEN, i); + if (IS_ERR(comp->tx_cq)) { + comp->tx_cq = NULL; goto cq_err; } - if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) + if (ib_req_notify_cq(comp->rx_cq, IB_CQ_NEXT_COMP)) goto cq_err; - tasklet_init(&device->cq_tasklet[i], - iser_cq_tasklet_fn, - (unsigned long)&cq_desc[i]); + tasklet_init(&comp->tasklet, iser_cq_tasklet_fn, + (unsigned long)comp); } device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | @@ -165,19 +157,19 @@ static int iser_create_device_ib_res(struct iser_device *device) handler_err: ib_dereg_mr(device->mr); dma_mr_err: - for (i = 0; i < device->cqs_used; i++) - tasklet_kill(&device->cq_tasklet[i]); + for (i = 0; i < device->comps_used; i++) + tasklet_kill(&device->comps[i].tasklet); cq_err: - for (i = 0; i < device->cqs_used; i++) { - if (device->tx_cq[i]) - ib_destroy_cq(device->tx_cq[i]); - if (device->rx_cq[i]) - ib_destroy_cq(device->rx_cq[i]); + for (i = 0; i < device->comps_used; i++) { + struct iser_comp *comp = &device->comps[i]; + + if (comp->tx_cq) + ib_destroy_cq(comp->tx_cq); + if (comp->rx_cq) + ib_destroy_cq(comp->rx_cq); } ib_dealloc_pd(device->pd); pd_err: - kfree(device->cq_desc); -cq_desc_err: iser_err("failed to allocate an IB resource\n"); return -1; } @@ -191,20 +183,20 @@ static void iser_free_device_ib_res(struct iser_device *device) int i; BUG_ON(device->mr == NULL); - for (i = 0; i < device->cqs_used; i++) { - tasklet_kill(&device->cq_tasklet[i]); - (void)ib_destroy_cq(device->tx_cq[i]); - (void)ib_destroy_cq(device->rx_cq[i]); - device->tx_cq[i] = NULL; - device->rx_cq[i] = NULL; + for (i = 0; i < device->comps_used; i++) { + struct iser_comp *comp = &device->comps[i]; + + tasklet_kill(&comp->tasklet); + ib_destroy_cq(comp->tx_cq); + ib_destroy_cq(comp->rx_cq); + comp->tx_cq = NULL; + comp->rx_cq = NULL; } (void)ib_unregister_event_handler(&device->event_handler); (void)ib_dereg_mr(device->mr); (void)ib_dealloc_pd(device->pd); - kfree(device->cq_desc); - device->mr = NULL; device->pd = NULL; } @@ -456,19 +448,20 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) mutex_lock(&ig.connlist_mutex); /* select the CQ with the minimal number of usages */ - for (index = 0; index < device->cqs_used; index++) - if (device->cq_active_qps[index] < - device->cq_active_qps[min_index]) + for (index = 0; index < device->comps_used; index++) { + if (device->comps[index].active_qps < + device->comps[min_index].active_qps) min_index = index; - device->cq_active_qps[min_index]++; - ib_conn->cq_index = min_index; + } + ib_conn->comp = &device->comps[min_index]; + ib_conn->comp->active_qps++; mutex_unlock(&ig.connlist_mutex); iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = device->tx_cq[min_index]; - init_attr.recv_cq = device->rx_cq[min_index]; + init_attr.send_cq = ib_conn->comp->tx_cq; + init_attr.recv_cq = ib_conn->comp->rx_cq; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; @@ -604,7 +597,7 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, iser_free_rx_descriptors(iser_conn); if (ib_conn->qp != NULL) { - ib_conn->device->cq_active_qps[ib_conn->cq_index]--; + ib_conn->comp->active_qps--; rdma_destroy_qp(ib_conn->cma_id); ib_conn->qp = NULL; } @@ -655,14 +648,13 @@ void iser_conn_release(struct iser_conn *iser_conn) */ static void iser_poll_for_flush_errors(struct ib_conn *ib_conn) { - struct iser_device *device = ib_conn->device; int count = 0; while (ib_conn->post_recv_buf_count > 0 || atomic_read(&ib_conn->post_send_buf_count) > 0) { msleep(100); if (atomic_read(&ib_conn->post_send_buf_count) > 0) - iser_drain_tx_cq(device, ib_conn->cq_index); + iser_drain_tx_cq(ib_conn->comp); count++; /* Don't flood with prints */ @@ -1189,9 +1181,9 @@ iser_handle_comp_error(struct iser_tx_desc *desc, kmem_cache_free(ig.desc_cache, desc); } -static int iser_drain_tx_cq(struct iser_device *device, int cq_index) +static int iser_drain_tx_cq(struct iser_comp *comp) { - struct ib_cq *cq = device->tx_cq[cq_index]; + struct ib_cq *cq = comp->tx_cq; struct ib_wc wc; struct iser_tx_desc *tx_desc; struct ib_conn *ib_conn; @@ -1222,20 +1214,18 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) static void iser_cq_tasklet_fn(unsigned long data) { - struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; - struct iser_device *device = cq_desc->device; - int cq_index = cq_desc->cq_index; - struct ib_cq *cq = device->rx_cq[cq_index]; - struct ib_wc wc; - struct iser_rx_desc *desc; - unsigned long xfer_len; + struct iser_comp *comp = (struct iser_comp *)data; + struct ib_cq *cq = comp->rx_cq; + struct ib_wc wc; + struct iser_rx_desc *desc; + unsigned long xfer_len; struct ib_conn *ib_conn; int completed_tx, completed_rx = 0; /* First do tx drain, so in a case where we have rx flushes and a successful * tx completion we will still go through completion error handling. */ - completed_tx = iser_drain_tx_cq(device, cq_index); + completed_tx = iser_drain_tx_cq(comp); while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; @@ -1257,7 +1247,7 @@ static void iser_cq_tasklet_fn(unsigned long data) } completed_rx++; if (!(completed_rx & 63)) - completed_tx += iser_drain_tx_cq(device, cq_index); + completed_tx += iser_drain_tx_cq(comp); } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ @@ -1268,11 +1258,9 @@ static void iser_cq_tasklet_fn(unsigned long data) static void iser_cq_callback(struct ib_cq *cq, void *cq_context) { - struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; - struct iser_device *device = cq_desc->device; - int cq_index = cq_desc->cq_index; + struct iser_comp *comp = cq_context; - tasklet_schedule(&device->cq_tasklet[cq_index]); + tasklet_schedule(&comp->tasklet); } u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, -- cgit v1.2.3 From 183cfa434ec90897b1423ce4f916e8a237139133 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:08 +0300 Subject: IB/iser: Use internal polling budget to avoid possible live-lock We need a way to guarentee that we don't stay in soft-IRQ context for too long. We might starve other pending CQ tasklets or worse lock against application trying to issue IO on the running CPU. Signed-off-by: Sagi Grimberg Signed-off-by: Roi Dayan Signed-off-by: Ariel Nahum Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 94d1b46b467a..e31ac57accc9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -42,6 +42,8 @@ #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) +static int iser_cq_poll_limit = 512; + static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); static int iser_drain_tx_cq(struct iser_comp *comp); @@ -1248,6 +1250,8 @@ static void iser_cq_tasklet_fn(unsigned long data) completed_rx++; if (!(completed_rx & 63)) completed_tx += iser_drain_tx_cq(comp); + if (completed_rx >= iser_cq_poll_limit) + break; } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ -- cgit v1.2.3 From 6aabfa76f5e5281e5db128a34420d8f33b8574f7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:09 +0300 Subject: IB/iser: Use single CQ for RX and TX This will solve a possible condition where we might miss TX completion (flush error) during session teardown. Since we are using a single CQ, we don't need to actively drain the TX CQ, instead just wait for flush_completion (when counters reach zero) and remove iser_poll_for_flush_errors(). This patch might introduce a minor performance regression on its own, but the next patches will enhance performance using a single CQ for RX and TX. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 9 +- drivers/infiniband/ulp/iser/iser_initiator.c | 3 +- drivers/infiniband/ulp/iser/iser_verbs.c | 227 +++++++++++++-------------- 3 files changed, 114 insertions(+), 125 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 2bc34aa50705..1617c5cce8b1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -271,16 +271,14 @@ struct iscsi_iser_task; * struct iser_comp - iSER completion context * * @device: pointer to device handle - * @rx_cq: RX completion queue - * @tx_cq: TX completion queue + * @cq: completion queue * @tasklet: Tasklet handle * @active_qps: Number of active QPs attached * to completion context */ struct iser_comp { struct iser_device *device; - struct ib_cq *rx_cq; - struct ib_cq *tx_cq; + struct ib_cq *cq; struct tasklet_struct tasklet; int active_qps; }; @@ -342,6 +340,7 @@ struct fast_reg_descriptor { * @device: reference to iser device * @comp: iser completion context * @pi_support: Indicate device T10-PI support + * @flush_comp: completes when all connection completions consumed * @lock: protects fmr/fastreg pool * @union.fmr: * @pool: FMR pool for fast registrations @@ -361,6 +360,7 @@ struct ib_conn { struct iser_device *device; struct iser_comp *comp; bool pi_support; + struct completion flush_comp; spinlock_t lock; union { struct { @@ -395,6 +395,7 @@ struct iser_conn { u64 login_req_dma, login_resp_dma; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; + u32 num_rx_descs; }; struct iscsi_iser_task { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 123174570c16..359c0b84f1ac 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -272,7 +272,8 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, if (iser_alloc_login_buf(iser_conn)) goto alloc_login_buf_fail; - iser_conn->rx_descs = kmalloc(session->cmds_max * + iser_conn->num_rx_descs = session->cmds_max; + iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!iser_conn->rx_descs) goto rx_desc_alloc_fail; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index e31ac57accc9..eedc27a0d3c3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -39,14 +39,14 @@ #include "iscsi_iser.h" #define ISCSI_ISER_MAX_CONN 8 -#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) -#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) +#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN) static int iser_cq_poll_limit = 512; static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); -static int iser_drain_tx_cq(struct iser_comp *comp); static void iser_cq_event_callback(struct ib_event *cause, void *context) { @@ -117,26 +117,17 @@ static int iser_create_device_ib_res(struct iser_device *device) struct iser_comp *comp = &device->comps[i]; comp->device = device; - comp->rx_cq = ib_create_cq(device->ib_device, - iser_cq_callback, - iser_cq_event_callback, - (void *)comp, - ISER_MAX_RX_CQ_LEN, i); - if (IS_ERR(comp->rx_cq)) { - comp->rx_cq = NULL; + comp->cq = ib_create_cq(device->ib_device, + iser_cq_callback, + iser_cq_event_callback, + (void *)comp, + ISER_MAX_CQ_LEN, i); + if (IS_ERR(comp->cq)) { + comp->cq = NULL; goto cq_err; } - comp->tx_cq = ib_create_cq(device->ib_device, NULL, - iser_cq_event_callback, - (void *)comp, - ISER_MAX_TX_CQ_LEN, i); - if (IS_ERR(comp->tx_cq)) { - comp->tx_cq = NULL; - goto cq_err; - } - - if (ib_req_notify_cq(comp->rx_cq, IB_CQ_NEXT_COMP)) + if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) goto cq_err; tasklet_init(&comp->tasklet, iser_cq_tasklet_fn, @@ -165,10 +156,8 @@ cq_err: for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; - if (comp->tx_cq) - ib_destroy_cq(comp->tx_cq); - if (comp->rx_cq) - ib_destroy_cq(comp->rx_cq); + if (comp->cq) + ib_destroy_cq(comp->cq); } ib_dealloc_pd(device->pd); pd_err: @@ -189,10 +178,8 @@ static void iser_free_device_ib_res(struct iser_device *device) struct iser_comp *comp = &device->comps[i]; tasklet_kill(&comp->tasklet); - ib_destroy_cq(comp->tx_cq); - ib_destroy_cq(comp->rx_cq); - comp->tx_cq = NULL; - comp->rx_cq = NULL; + ib_destroy_cq(comp->cq); + comp->cq = NULL; } (void)ib_unregister_event_handler(&device->event_handler); @@ -462,8 +449,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = ib_conn->comp->tx_cq; - init_attr.recv_cq = ib_conn->comp->rx_cq; + init_attr.send_cq = ib_conn->comp->cq; + init_attr.recv_cq = ib_conn->comp->cq; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; @@ -640,33 +627,6 @@ void iser_conn_release(struct iser_conn *iser_conn) kfree(iser_conn); } -/** - * iser_poll_for_flush_errors - Don't settle for less than all. - * @struct ib_conn: IB context of the connection - * - * This routine is called when the QP is in error state - * It polls the send CQ until all flush errors are consumed and - * returns when all flush errors were processed. - */ -static void iser_poll_for_flush_errors(struct ib_conn *ib_conn) -{ - int count = 0; - - while (ib_conn->post_recv_buf_count > 0 || - atomic_read(&ib_conn->post_send_buf_count) > 0) { - msleep(100); - if (atomic_read(&ib_conn->post_send_buf_count) > 0) - iser_drain_tx_cq(ib_conn->comp); - - count++; - /* Don't flood with prints */ - if (count % 30 == 0) - iser_dbg("post_recv %d post_send %d", - ib_conn->post_recv_buf_count, - atomic_read(&ib_conn->post_send_buf_count)); - } -} - /** * triggers start of the disconnect procedures and wait for them to be done * Called with state mutex held @@ -698,7 +658,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn) iser_err("Failed to disconnect, conn: 0x%p err %d\n", iser_conn, err); - iser_poll_for_flush_errors(ib_conn); + wait_for_completion(&ib_conn->flush_comp); } return 1; @@ -908,6 +868,7 @@ void iser_conn_init(struct iser_conn *iser_conn) iser_conn->state = ISER_CONN_INIT; iser_conn->ib_conn.post_recv_buf_count = 0; atomic_set(&iser_conn->ib_conn.post_send_buf_count, 0); + init_completion(&iser_conn->ib_conn.flush_comp); init_completion(&iser_conn->stop_completion); init_completion(&iser_conn->ib_completion); init_completion(&iser_conn->up_completion); @@ -1155,9 +1116,31 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) return ib_ret; } +/** + * is_iser_tx_desc - Indicate if the completion wr_id + * is a TX descriptor or not. + * @iser_conn: iser connection + * @wr_id: completion WR identifier + * + * Since we cannot rely on wc opcode in FLUSH errors + * we must work around it by checking if the wr_id address + * falls in the iser connection rx_descs buffer. If so + * it is an RX descriptor, otherwize it is a TX. + */ +static inline bool +is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) +{ + void *start = iser_conn->rx_descs; + int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); + + if (wr_id >= start && wr_id < start + len) + return false; + + return true; +} + /** * iser_handle_comp_error() - Handle error completion - * @desc: iser TX descriptor * @ib_conn: connection RDMA resources * @wc: work completion * @@ -1167,8 +1150,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) * connection is failed (in case we passed bind stage). */ static void -iser_handle_comp_error(struct iser_tx_desc *desc, - struct ib_conn *ib_conn, +iser_handle_comp_error(struct ib_conn *ib_conn, struct ib_wc *wc) { struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, @@ -1179,85 +1161,90 @@ iser_handle_comp_error(struct iser_tx_desc *desc, iscsi_conn_failure(iser_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); - if (desc && desc->type == ISCSI_TX_DATAOUT) - kmem_cache_free(ig.desc_cache, desc); + if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) { + struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id; + + atomic_dec(&ib_conn->post_send_buf_count); + if (desc->type == ISCSI_TX_DATAOUT) + kmem_cache_free(ig.desc_cache, desc); + } else { + ib_conn->post_recv_buf_count--; + } } -static int iser_drain_tx_cq(struct iser_comp *comp) +/** + * iser_handle_wc - handle a single work completion + * @wc: work completion + * + * Soft-IRQ context, work completion can be either + * SEND or RECV, and can turn out successful or + * with error (or flush error). + */ +static void iser_handle_wc(struct ib_wc *wc) { - struct ib_cq *cq = comp->tx_cq; - struct ib_wc wc; - struct iser_tx_desc *tx_desc; struct ib_conn *ib_conn; - int completed_tx = 0; + struct iser_tx_desc *tx_desc; + struct iser_rx_desc *rx_desc; - while (ib_poll_cq(cq, 1, &wc) == 1) { - tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; - ib_conn = wc.qp->qp_context; - if (wc.status == IB_WC_SUCCESS) { - if (wc.opcode == IB_WC_SEND) - iser_snd_completion(tx_desc, ib_conn); - else - iser_err("expected opcode %d got %d\n", - IB_WC_SEND, wc.opcode); + ib_conn = wc->qp->qp_context; + if (wc->status == IB_WC_SUCCESS) { + if (wc->opcode == IB_WC_RECV) { + rx_desc = (struct iser_rx_desc *)wc->wr_id; + iser_rcv_completion(rx_desc, wc->byte_len, + ib_conn); + } else + if (wc->opcode == IB_WC_SEND) { + tx_desc = (struct iser_tx_desc *)wc->wr_id; + iser_snd_completion(tx_desc, ib_conn); + atomic_dec(&ib_conn->post_send_buf_count); } else { - iser_err("tx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - if (wc.wr_id != ISER_FASTREG_LI_WRID) { - atomic_dec(&ib_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, ib_conn, &wc); - } + iser_err("Unknown wc opcode %d\n", wc->opcode); } - completed_tx++; + } else { + if (wc->status != IB_WC_WR_FLUSH_ERR) + iser_err("wr id %llx status %d vend_err %x\n", + wc->wr_id, wc->status, wc->vendor_err); + else + iser_dbg("flush error: wr id %llx\n", wc->wr_id); + + if (wc->wr_id != ISER_FASTREG_LI_WRID) + iser_handle_comp_error(ib_conn, wc); + + /* complete in case all flush errors were consumed */ + if (ib_conn->post_recv_buf_count == 0 && + atomic_read(&ib_conn->post_send_buf_count) == 0) + complete(&ib_conn->flush_comp); } - return completed_tx; } - +/** + * iser_cq_tasklet_fn - iSER completion polling loop + * @data: iSER completion context + * + * Soft-IRQ context, polling connection CQ until + * either CQ was empty or we exausted polling budget + */ static void iser_cq_tasklet_fn(unsigned long data) { struct iser_comp *comp = (struct iser_comp *)data; - struct ib_cq *cq = comp->rx_cq; + struct ib_cq *cq = comp->cq; struct ib_wc wc; - struct iser_rx_desc *desc; - unsigned long xfer_len; - struct ib_conn *ib_conn; - int completed_tx, completed_rx = 0; - - /* First do tx drain, so in a case where we have rx flushes and a successful - * tx completion we will still go through completion error handling. - */ - completed_tx = iser_drain_tx_cq(comp); + int completed = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { - desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; - BUG_ON(desc == NULL); - ib_conn = wc.qp->qp_context; - if (wc.status == IB_WC_SUCCESS) { - if (wc.opcode == IB_WC_RECV) { - xfer_len = (unsigned long)wc.byte_len; - iser_rcv_completion(desc, xfer_len, ib_conn); - } else - iser_err("expected opcode %d got %d\n", - IB_WC_RECV, wc.opcode); - } else { - if (wc.status != IB_WC_WR_FLUSH_ERR) - iser_err("rx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - ib_conn->post_recv_buf_count--; - iser_handle_comp_error(NULL, ib_conn, &wc); - } - completed_rx++; - if (!(completed_rx & 63)) - completed_tx += iser_drain_tx_cq(comp); - if (completed_rx >= iser_cq_poll_limit) + iser_handle_wc(&wc); + + if (++completed >= iser_cq_poll_limit) break; } - /* #warning "it is assumed here that arming CQ only once its empty" * - * " would not cause interrupts to be missed" */ + + /* + * It is assumed here that arming CQ only once its empty + * would not cause interrupts to be missed. + */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); + iser_dbg("got %d completions\n", completed); } static void iser_cq_callback(struct ib_cq *cq, void *cq_context) -- cgit v1.2.3 From ff3dd52d267165347d6f92a90016e692d074a00c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:10 +0300 Subject: IB/iser: Use beacon to indicate all completions were consumed Avoid post_send counting (atomic) in the IO path just to keep track of how many completions we need to consume. Use a beacon post to indicate that all prior posts completed. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 5 +++-- drivers/infiniband/ulp/iser/iser_initiator.c | 8 ++----- drivers/infiniband/ulp/iser/iser_verbs.c | 32 ++++++++++++++++------------ 3 files changed, 23 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 1617c5cce8b1..4fcb25604d80 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -150,6 +150,7 @@ #define ISER_RSV 0x04 #define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL +#define ISER_BEACON_WRID 0xfffffffffffffffeULL struct iser_hdr { u8 flags; @@ -335,11 +336,11 @@ struct fast_reg_descriptor { * @cma_id: rdma_cm connection maneger handle * @qp: Connection Queue-pair * @post_recv_buf_count: post receive counter - * @post_send_buf_count: post send counter * @rx_wr: receive work request for batch posts * @device: reference to iser device * @comp: iser completion context * @pi_support: Indicate device T10-PI support + * @beacon: beacon send wr to signal all flush errors were drained * @flush_comp: completes when all connection completions consumed * @lock: protects fmr/fastreg pool * @union.fmr: @@ -355,11 +356,11 @@ struct ib_conn { struct rdma_cm_id *cma_id; struct ib_qp *qp; int post_recv_buf_count; - atomic_t post_send_buf_count; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_comp *comp; bool pi_support; + struct ib_send_wr beacon; struct completion flush_comp; spinlock_t lock; union { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 359c0b84f1ac..ffbdf922587a 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -350,12 +350,10 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) return 0; /* - * Check that there is one posted recv buffer (for the last login - * response) and no posted send buffers left - they must have been - * consumed during previous login phases. + * Check that there is one posted recv buffer + * (for the last login response). */ WARN_ON(ib_conn->post_recv_buf_count != 1); - WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0); if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); @@ -634,8 +632,6 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc, tx_desc = NULL; } - atomic_dec(&ib_conn->post_send_buf_count); - if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ task = (void *) ((long)(void *)tx_desc - diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index eedc27a0d3c3..805a9bdc9520 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -41,7 +41,8 @@ #define ISCSI_ISER_MAX_CONN 8 #define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) #define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) -#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN) +#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ + ISCSI_ISER_MAX_CONN) static int iser_cq_poll_limit = 512; @@ -457,10 +458,10 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; if (ib_conn->pi_support) { - init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; + init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; } else { - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; + init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; } ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); @@ -634,6 +635,7 @@ void iser_conn_release(struct iser_conn *iser_conn) int iser_conn_terminate(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; + struct ib_send_wr *bad_wr; int err = 0; /* terminate the iser conn only if the conn state is UP */ @@ -658,6 +660,11 @@ int iser_conn_terminate(struct iser_conn *iser_conn) iser_err("Failed to disconnect, conn: 0x%p err %d\n", iser_conn, err); + /* post an indication that all flush errors were consumed */ + err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr); + if (err) + iser_err("conn %p failed to post beacon", ib_conn); + wait_for_completion(&ib_conn->flush_comp); } @@ -867,7 +874,6 @@ void iser_conn_init(struct iser_conn *iser_conn) { iser_conn->state = ISER_CONN_INIT; iser_conn->ib_conn.post_recv_buf_count = 0; - atomic_set(&iser_conn->ib_conn.post_send_buf_count, 0); init_completion(&iser_conn->ib_conn.flush_comp); init_completion(&iser_conn->stop_completion); init_completion(&iser_conn->ib_completion); @@ -900,6 +906,9 @@ int iser_connect(struct iser_conn *iser_conn, iser_conn->state = ISER_CONN_PENDING; + ib_conn->beacon.wr_id = ISER_BEACON_WRID; + ib_conn->beacon.opcode = IB_WR_SEND; + ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); @@ -1106,13 +1115,10 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - atomic_inc(&ib_conn->post_send_buf_count); - ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); - if (ib_ret) { + if (ib_ret) iser_err("ib_post_send failed, ret:%d\n", ib_ret); - atomic_dec(&ib_conn->post_send_buf_count); - } + return ib_ret; } @@ -1164,7 +1170,6 @@ iser_handle_comp_error(struct ib_conn *ib_conn, if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) { struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id; - atomic_dec(&ib_conn->post_send_buf_count); if (desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, desc); } else { @@ -1196,7 +1201,6 @@ static void iser_handle_wc(struct ib_wc *wc) if (wc->opcode == IB_WC_SEND) { tx_desc = (struct iser_tx_desc *)wc->wr_id; iser_snd_completion(tx_desc, ib_conn); - atomic_dec(&ib_conn->post_send_buf_count); } else { iser_err("Unknown wc opcode %d\n", wc->opcode); } @@ -1207,12 +1211,12 @@ static void iser_handle_wc(struct ib_wc *wc) else iser_dbg("flush error: wr id %llx\n", wc->wr_id); - if (wc->wr_id != ISER_FASTREG_LI_WRID) + if (wc->wr_id != ISER_FASTREG_LI_WRID && + wc->wr_id != ISER_BEACON_WRID) iser_handle_comp_error(ib_conn, wc); /* complete in case all flush errors were consumed */ - if (ib_conn->post_recv_buf_count == 0 && - atomic_read(&ib_conn->post_send_buf_count) == 0) + if (wc->wr_id == ISER_BEACON_WRID) complete(&ib_conn->flush_comp); } } -- cgit v1.2.3 From 6e6fe2fb1d61b4baef1cf350049c6877583681ee Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:11 +0300 Subject: IB/iser: Optimize completion polling Poll in batch of 16. Since we don't want it on the stack, keep under iser completion context (iser_comp). Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 4 ++++ drivers/infiniband/ulp/iser/iser_verbs.c | 12 +++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 4fcb25604d80..6c3743b6860e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -145,6 +145,8 @@ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) +#define ISER_WC_BATCH_COUNT 16 + #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 @@ -273,6 +275,7 @@ struct iscsi_iser_task; * * @device: pointer to device handle * @cq: completion queue + * @wcs: work completion array * @tasklet: Tasklet handle * @active_qps: Number of active QPs attached * to completion context @@ -280,6 +283,7 @@ struct iscsi_iser_task; struct iser_comp { struct iser_device *device; struct ib_cq *cq; + struct ib_wc wcs[ISER_WC_BATCH_COUNT]; struct tasklet_struct tasklet; int active_qps; }; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 805a9bdc9520..82bedbc260b2 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1232,13 +1232,15 @@ static void iser_cq_tasklet_fn(unsigned long data) { struct iser_comp *comp = (struct iser_comp *)data; struct ib_cq *cq = comp->cq; - struct ib_wc wc; - int completed = 0; + struct ib_wc *const wcs = comp->wcs; + int i, n, completed = 0; - while (ib_poll_cq(cq, 1, &wc) == 1) { - iser_handle_wc(&wc); + while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { + for (i = 0; i < n; i++) + iser_handle_wc(&wcs[i]); - if (++completed >= iser_cq_poll_limit) + completed += n; + if (completed >= iser_cq_poll_limit) break; } -- cgit v1.2.3 From 6df5a128f0fde6315a44e80b30412997147f5efd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:12 +0300 Subject: IB/iser: Suppress scsi command send completions Singal completion of every 32 scsi commands and suppress all the rest. We don't do anything upon getting the completion so no need to "just consume" it. Cleanup of scsi command is done in cleanup_task callback. Still keep dataout and control send completions as we may need to cleanup there. This helps reducing the amount of interrupts/completions in the IO path. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 5 +++-- drivers/infiniband/ulp/iser/iser_initiator.c | 13 ++++++++++--- drivers/infiniband/ulp/iser/iser_verbs.c | 5 +++-- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 6c3743b6860e..f4e9d621826e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -101,7 +101,6 @@ #define SHIFT_4K 12 #define SIZE_4K (1ULL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) - /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISER_DEF_XMIT_CMDS_DEFAULT 512 @@ -146,6 +145,7 @@ ISER_MAX_RX_MISC_PDUS) #define ISER_WC_BATCH_COUNT 16 +#define ISER_SIGNAL_CMD_COUNT 32 #define ISER_VER 0x10 #define ISER_WSV 0x08 @@ -500,7 +500,8 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, int iser_post_recvl(struct iser_conn *iser_conn); int iser_post_recvm(struct iser_conn *iser_conn, int count); -int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc); +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, + bool signal); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index ffbdf922587a..5a489ea63732 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -369,6 +369,11 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) return 0; } +static inline bool iser_signal_comp(int sig_count) +{ + return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); +} + /** * iser_send_command - send command PDU */ @@ -383,6 +388,7 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; + static unsigned sig_count; edtl = ntohl(hdr->data_length); @@ -428,7 +434,8 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(&iser_conn->ib_conn, tx_desc); + err = iser_post_send(&iser_conn->ib_conn, tx_desc, + iser_signal_comp(++sig_count)); if (!err) return 0; @@ -493,7 +500,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(&iser_conn->ib_conn, tx_desc); + err = iser_post_send(&iser_conn->ib_conn, tx_desc, true); if (!err) return 0; @@ -555,7 +562,7 @@ int iser_send_control(struct iscsi_conn *conn, goto send_control_error; } - err = iser_post_send(&iser_conn->ib_conn, mdesc); + err = iser_post_send(&iser_conn->ib_conn, mdesc, true); if (!err) return 0; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 82bedbc260b2..67225bb82bb5 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1099,7 +1099,8 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) * * returns 0 on success, -1 on failure */ -int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, + bool signal) { int ib_ret; struct ib_send_wr send_wr, *send_wr_failed; @@ -1113,7 +1114,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) send_wr.sg_list = tx_desc->tx_sg; send_wr.num_sge = tx_desc->num_sge; send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; + send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); if (ib_ret) -- cgit v1.2.3 From bba0a3c9d7bd7f49ae38e7dfd87f6ee78c91317a Mon Sep 17 00:00:00 2001 From: Ariel Nahum Date: Wed, 1 Oct 2014 14:02:13 +0300 Subject: IB/iser: Change iscsi_conn_stop log level to info Match to the debug level of all functions in connect/disconnect flows. Signed-off-by: Ariel Nahum Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 81d69a30bcca..decf696e7ea5 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -410,7 +410,7 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) struct iscsi_conn *conn = cls_conn->dd_data; struct iser_conn *iser_conn = conn->dd_data; - iser_dbg("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn); + iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn); /* * Userspace may have goofed up and not bound the connection or -- cgit v1.2.3 From e9d49b82f1ff3f966af70d20b04bb0fae4432c24 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:14 +0300 Subject: IB/iser: Nit - add space after __func__ in iser logging Change logging: "iser:XXXX" to "iser: XXXX" Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index f4e9d621826e..eedcbf277baa 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -71,30 +71,30 @@ #define PFX DRV_NAME ": " #define DRV_VER "1.4.1" -#define iser_dbg(fmt, arg...) \ - do { \ - if (iser_debug_level > 2) \ - printk(KERN_DEBUG PFX "%s:" fmt,\ - __func__ , ## arg); \ +#define iser_dbg(fmt, arg...) \ + do { \ + if (iser_debug_level > 2) \ + printk(KERN_DEBUG PFX "%s: " fmt,\ + __func__ , ## arg); \ } while (0) #define iser_warn(fmt, arg...) \ do { \ if (iser_debug_level > 0) \ - pr_warn(PFX "%s:" fmt, \ + pr_warn(PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) #define iser_info(fmt, arg...) \ do { \ if (iser_debug_level > 1) \ - pr_info(PFX "%s:" fmt, \ + pr_info(PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) #define iser_err(fmt, arg...) \ do { \ - printk(KERN_ERR PFX "%s:" fmt, \ + printk(KERN_ERR PFX "%s: " fmt, \ __func__ , ## arg); \ } while (0) -- cgit v1.2.3 From cd88621a9e91ee791dee321cb81a45cb357855ee Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:15 +0300 Subject: IB/iser: Add/Fix kernel doc style descriptions in iscsi_iser.h - iser_hdr - iser_data_buf - iser_mem_reg - iser_regd_buf - iser_tx_desc - iser_rx_desc - iser_device - iser_pi_context - iser_conn - ib_conn - iser_comp - iscsi_iser_task - iser_global While we're at it, change nit alignments in this file This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 235 +++++++++++++++++++++++++------ 1 file changed, 189 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index eedcbf277baa..5250a125b79e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -154,12 +154,22 @@ #define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL #define ISER_BEACON_WRID 0xfffffffffffffffeULL +/** + * struct iser_hdr - iSER header + * + * @flags: flags support (zbva, remote_inv) + * @rsvd: reserved + * @write_stag: write rkey + * @write_va: write virtual address + * @reaf_stag: read rkey + * @read_va: read virtual address + */ struct iser_hdr { u8 flags; u8 rsvd[3]; - __be32 write_stag; /* write rkey */ + __be32 write_stag; __be64 write_va; - __be32 read_stag; /* read rkey */ + __be32 read_stag; __be64 read_va; } __attribute__((packed)); @@ -203,15 +213,25 @@ enum iser_data_dir { ISER_DIRS_NUM }; +/** + * struct iser_data_buf - iSER data buffer + * + * @buf: pointer to the sg list + * @size: num entries of this sg + * @data_len: total beffer byte len + * @dma_nents: returned by dma_map_sg + * @copy_buf: allocated copy buf for SGs unaligned + * for rdma which are copied + * @sg_single: SG-ified clone of a non SG SC or + * unaligned SG + */ struct iser_data_buf { - void *buf; /* pointer to the sg list */ - unsigned int size; /* num entries of this sg */ - unsigned long data_len; /* total data len */ - unsigned int dma_nents; /* returned by dma_map_sg */ - char *copy_buf; /* allocated copy buf for SGs unaligned * - * for rdma which are copied */ - struct scatterlist sg_single; /* SG-ified clone of a non SG SC or * - * unaligned SG */ + void *buf; + unsigned int size; + unsigned long data_len; + unsigned int dma_nents; + char *copy_buf; + struct scatterlist sg_single; }; /* fwd declarations */ @@ -219,6 +239,16 @@ struct iser_device; struct iscsi_iser_task; struct iscsi_endpoint; +/** + * struct iser_mem_reg - iSER memory registration info + * + * @lkey: MR local key + * @rkey: MR remote key + * @va: MR start address (buffer va) + * @len: MR length + * @mem_h: pointer to registration context (FMR/Fastreg) + * @is_mr: indicates weather we registered the buffer + */ struct iser_mem_reg { u32 lkey; u32 rkey; @@ -228,11 +258,20 @@ struct iser_mem_reg { int is_mr; }; +/** + * struct iser_regd_buf - iSER buffer registration desc + * + * @reg: memory registration info + * @virt_addr: virtual address of buffer + * @device: reference to iser device + * @direction: dma direction (for dma_unmap) + * @data_size: data buffer size in bytes + */ struct iser_regd_buf { - struct iser_mem_reg reg; /* memory registration info */ + struct iser_mem_reg reg; void *virt_addr; - struct iser_device *device; /* device->device for dma_unmap */ - enum dma_data_direction direction; /* direction for dma_unmap */ + struct iser_device *device; + enum dma_data_direction direction; unsigned int data_size; }; @@ -242,19 +281,39 @@ enum iser_desc_type { ISCSI_TX_DATAOUT }; +/** + * struct iser_tx_desc - iSER TX descriptor (for send wr_id) + * + * @iser_header: iser header + * @iscsi_header: iscsi header + * @type: command/control/dataout + * @dam_addr: header buffer dma_address + * @tx_sg: sg[0] points to iser/iscsi headers + * sg[1] optionally points to either of immediate data + * unsolicited data-out or control + * @num_sge: number sges used on this TX task + */ struct iser_tx_desc { struct iser_hdr iser_header; struct iscsi_hdr iscsi_header; enum iser_desc_type type; u64 dma_addr; - /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either - of immediate data, unsolicited data-out or control (login,text) */ struct ib_sge tx_sg[2]; int num_sge; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ sizeof(u64) + sizeof(struct ib_sge))) +/** + * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) + * + * @iser_header: iser header + * @iscsi_header: iscsi header + * @data: received data segment + * @dma_addr: receive buffer dma address + * @rx_sg: ib_sge of receive buffer + * @pad: for sense data TODO: Modify to maximum sense length supported + */ struct iser_rx_desc { struct iser_hdr iser_header; struct iscsi_hdr iscsi_header; @@ -288,13 +347,32 @@ struct iser_comp { int active_qps; }; +/** + * struct iser_device - iSER device handle + * + * @ib_device: RDMA device + * @pd: Protection Domain for this device + * @dev_attr: Device attributes container + * @mr: Global DMA memory region + * @event_handler: IB events handle routine + * @ig_list: entry in devices list + * @refcount: Reference counter, dominated by open iser connections + * @comps_used: Number of completion contexts used, Min between online + * cpus and device max completion vectors + * @comps: Dinamically allocated array of completion handlers + * Memory registration pool Function pointers (FMR or Fastreg): + * @iser_alloc_rdma_reg_res: Allocation of memory regions pool + * @iser_free_rdma_reg_res: Free of memory regions pool + * @iser_reg_rdma_mem: Memory registration routine + * @iser_unreg_rdma_mem: Memory deregistration routine + */ struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; struct ib_device_attr dev_attr; struct ib_mr *mr; struct ib_event_handler event_handler; - struct list_head ig_list; /* entry in ig devices list */ + struct list_head ig_list; int refcount; int comps_used; struct iser_comp comps[ISER_MAX_CQ]; @@ -318,19 +396,33 @@ enum iser_reg_indicator { ISER_FASTREG_PROTECTED = 1 << 3, }; +/** + * struct iser_pi_context - Protection information context + * + * @prot_mr: protection memory region + * @prot_frpl: protection fastreg page list + * @sig_mr: signature feature enabled memory region + */ struct iser_pi_context { struct ib_mr *prot_mr; struct ib_fast_reg_page_list *prot_frpl; struct ib_mr *sig_mr; }; +/** + * struct fast_reg_descriptor - Fast registration descriptor + * + * @list: entry in connection fastreg pool + * @data_mr: data memory region + * @data_frpl: data fastreg page list + * @pi_ctx: protection information context + * @reg_indicators: fast registration indicators + */ struct fast_reg_descriptor { struct list_head list; - /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; struct iser_pi_context *pi_ctx; - /* registration indicators container */ u8 reg_indicators; }; @@ -379,21 +471,49 @@ struct ib_conn { }; }; +/** + * struct iser_conn - iSER connection context + * + * @ib_conn: connection RDMA resources + * @iscsi_conn: link to matching iscsi connection + * @ep: transport handle + * @state: connection logical state + * @qp_max_recv_dtos: maximum number of data outs, corresponds + * to max number of post recvs + * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1) + * @min_posted_rx: (qp_max_recv_dtos >> 2) + * @name: connection peer portal + * @release_work: deffered work for release job + * @state_mutex: protects iser onnection state + * @stop_completion: conn_stop completion + * @ib_completion: RDMA cleanup completion + * @up_completion: connection establishment completed + * (state is ISER_CONN_UP) + * @conn_list: entry in ig conn list + * @login_buf: login data buffer (stores login parameters) + * @login_req_buf: login request buffer + * @login_req_dma: login request buffer dma address + * @login_resp_buf: login response buffer + * @login_resp_dma: login response buffer dma address + * @rx_desc_head: head of rx_descs cyclic buffer + * @rx_descs: rx buffers array (cyclic buffer) + * @num_rx_descs: number of rx descriptors + */ struct iser_conn { struct ib_conn ib_conn; struct iscsi_conn *iscsi_conn; struct iscsi_endpoint *ep; - enum iser_conn_state state; /* rdma connection state */ - unsigned qp_max_recv_dtos; /* num of rx buffers */ - unsigned qp_max_recv_dtos_mask; /* above minus 1 */ - unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ + enum iser_conn_state state; + unsigned qp_max_recv_dtos; + unsigned qp_max_recv_dtos_mask; + unsigned min_posted_rx; char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; struct mutex state_mutex; struct completion stop_completion; struct completion ib_completion; struct completion up_completion; - struct list_head conn_list; /* entry in ig conn list */ + struct list_head conn_list; char *login_buf; char *login_req_buf, *login_resp_buf; @@ -403,18 +523,33 @@ struct iser_conn { u32 num_rx_descs; }; +/** + * struct iscsi_iser_task - iser task context + * + * @desc: TX descriptor + * @iser_conn: link to iser connection + * @status: current task status + * @sc: link to scsi command + * @command_sent: indicate if command was sent + * @dir: iser data direction + * @rdma_regd: task rdma registration desc + * @data: iser data buffer desc + * @data_copy: iser data copy buffer desc (bounce buffer) + * @prot: iser protection buffer desc + * @prot_copy: iser protection copy buffer desc (bounce buffer) + */ struct iscsi_iser_task { struct iser_tx_desc desc; struct iser_conn *iser_conn; enum iser_task_status status; struct scsi_cmnd *sc; - int command_sent; /* set if command sent */ - int dir[ISER_DIRS_NUM]; /* set if dir use*/ - struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ - struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ - struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ - struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */ - struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */ + int command_sent; + int dir[ISER_DIRS_NUM]; + struct iser_regd_buf rdma_regd[ISER_DIRS_NUM]; + struct iser_data_buf data[ISER_DIRS_NUM]; + struct iser_data_buf data_copy[ISER_DIRS_NUM]; + struct iser_data_buf prot[ISER_DIRS_NUM]; + struct iser_data_buf prot_copy[ISER_DIRS_NUM]; }; struct iser_page_vec { @@ -424,12 +559,20 @@ struct iser_page_vec { int data_size; }; +/** + * struct iser_global: iSER global context + * + * @device_list_mutex: protects device_list + * @device_list: iser devices global list + * @connlist_mutex: protects connlist + * @connlist: iser connections global list + * @desc_cache: kmem cache for tx dataout + */ struct iser_global { - struct mutex device_list_mutex;/* */ - struct list_head device_list; /* all iSER devices */ + struct mutex device_list_mutex; + struct list_head device_list; struct mutex connlist_mutex; - struct list_head connlist; /* all iSER IB connections */ - + struct list_head connlist; struct kmem_cache *desc_cache; }; @@ -449,9 +592,9 @@ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_data *hdr); void iscsi_iser_recv(struct iscsi_conn *conn, - struct iscsi_hdr *hdr, - char *rx_data, - int rx_data_len); + struct iscsi_hdr *hdr, + char *rx_data, + int rx_data_len); void iser_conn_init(struct iser_conn *iser_conn); @@ -462,7 +605,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn); void iser_release_work(struct work_struct *work); void iser_rcv_completion(struct iser_rx_desc *desc, - unsigned long dto_xfer_len, + unsigned long dto_xfer_len, struct ib_conn *ib_conn); void iser_snd_completion(struct iser_tx_desc *desc, @@ -484,14 +627,14 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_connect(struct iser_conn *iser_conn, - struct sockaddr *src_addr, - struct sockaddr *dst_addr, - int non_blocking); +int iser_connect(struct iser_conn *iser_conn, + struct sockaddr *src_addr, + struct sockaddr *dst_addr, + int non_blocking); int iser_reg_page_vec(struct ib_conn *ib_conn, struct iser_page_vec *page_vec, - struct iser_mem_reg *mem_reg); + struct iser_mem_reg *mem_reg); void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); @@ -504,9 +647,9 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir); + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir); void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data); -- cgit v1.2.3 From dc05ac36f729b9f53465ad6e6985129470fa7fcd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 1 Oct 2014 14:02:16 +0300 Subject: IB/iser: Fix/add kernel-doc style description in iscsi_iser.c This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 169 +++++++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index decf696e7ea5..8c04c2d3cfd9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -102,9 +102,19 @@ MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); static struct workqueue_struct *release_wq; struct iser_global ig; +/* + * iscsi_iser_recv() - Process a successfull recv completion + * @conn: iscsi connection + * @hdr: iscsi header + * @rx_data: buffer containing receive data payload + * @rx_data_len: length of rx_data + * + * Notes: In case of data length errors or iscsi PDU completion failures + * this routine will signal iscsi layer of connection failure. + */ void -iscsi_iser_recv(struct iscsi_conn *conn, - struct iscsi_hdr *hdr, char *rx_data, int rx_data_len) +iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, + char *rx_data, int rx_data_len) { int rc = 0; int datalen; @@ -135,12 +145,22 @@ error: iscsi_conn_failure(conn, rc); } -static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) +/** + * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU + * @task: iscsi task + * @opcode: iscsi command opcode + * + * Netes: This routine can't fail, just assign iscsi task + * hdr and max hdr size. + */ +static int +iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) { struct iscsi_iser_task *iser_task = task->dd_data; task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header; task->hdr_max = sizeof(iser_task->desc.iscsi_header); + return 0; } @@ -165,11 +185,15 @@ int iser_initialize_task_headers(struct iscsi_task *task, iser_task->iser_conn = iser_conn; return 0; } + /** - * iscsi_iser_task_init - Initialize task + * iscsi_iser_task_init() - Initialize iscsi-iser task * @task: iscsi task * * Initialize the task for the scsi command or mgmt command. + * + * Return: Returns zero on success or -ENOMEM when failing + * to init task headers (dma mapping error). */ static int iscsi_iser_task_init(struct iscsi_task *task) @@ -191,7 +215,7 @@ iscsi_iser_task_init(struct iscsi_task *task) } /** - * iscsi_iser_mtask_xmit - xmit management(immediate) task + * iscsi_iser_mtask_xmit() - xmit management (immediate) task * @conn: iscsi connection * @task: task management task * @@ -249,6 +273,12 @@ iscsi_iser_task_xmit_unsol_data_exit: return error; } +/** + * iscsi_iser_task_xmit() - xmit iscsi-iser task + * @task: iscsi task + * + * Return: zero on success or escalates $error on failure. + */ static int iscsi_iser_task_xmit(struct iscsi_task *task) { @@ -286,6 +316,14 @@ iscsi_iser_task_xmit(struct iscsi_task *task) return error; } +/** + * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task + * @task: iscsi task + * + * Notes: In case the RDMA device is already NULL (might have + * been removed in DEVICE_REMOVAL CM event it will bail-out + * without doing dma unmapping. + */ static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; @@ -310,7 +348,20 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) } } -static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +/** + * iscsi_iser_check_protection() - check protection information status of task. + * @task: iscsi task + * @sector: error sector if exsists (output) + * + * Return: zero if no data-integrity errors have occured + * 0x1: data-integrity error occured in the guard-block + * 0x2: data-integrity error occured in the reference tag + * 0x3: data-integrity error occured in the application tag + * + * In addition the error sector is marked. + */ +static u8 +iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) { struct iscsi_iser_task *iser_task = task->dd_data; @@ -322,8 +373,17 @@ static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) sector); } +/** + * iscsi_iser_conn_create() - create a new iscsi-iser connection + * @cls_session: iscsi class connection + * @conn_idx: connection index within the session (for MCS) + * + * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL + * otherwise. + */ static struct iscsi_cls_conn * -iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) +iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, + uint32_t conn_idx) { struct iscsi_conn *conn; struct iscsi_cls_conn *cls_conn; @@ -342,9 +402,21 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) return cls_conn; } +/** + * iscsi_iser_conn_bind() - bind iscsi and iser connection structures + * @cls_session: iscsi class session + * @cls_conn: iscsi class connection + * @transport_eph: transport end-point handle + * @is_leading: indicate if this is the session leading connection (MCS) + * + * Return: zero on success, $error if iscsi_conn_bind fails and + * -EINVAL in case end-point doesn't exsits anymore or iser connection + * state is not UP (teardown already started). + */ static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, - struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, + struct iscsi_cls_conn *cls_conn, + uint64_t transport_eph, int is_leading) { struct iscsi_conn *conn = cls_conn->dd_data; @@ -391,6 +463,14 @@ out: return error; } +/** + * iscsi_iser_conn_start() - start iscsi-iser connection + * @cls_conn: iscsi class connection + * + * Notes: Here iser intialize (or re-initialize) stop_completion as + * from this point iscsi must call conn_stop in session/connection + * teardown so iser transport must wait for it. + */ static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) { @@ -404,6 +484,16 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) return iscsi_conn_start(cls_conn); } +/** + * iscsi_iser_conn_stop() - stop iscsi-iser connection + * @cls_conn: iscsi class connection + * @flag: indicate if recover or terminate (passed as is) + * + * Notes: Calling iscsi_conn_stop might theoretically race with + * DEVICE_REMOVAL event and dereference a previously freed RDMA device + * handle, so we call it under iser the state lock to protect against + * this kind of race. + */ static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { @@ -432,7 +522,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) } } -static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) +/** + * iscsi_iser_session_destroy() - destroy iscsi-iser session + * @cls_session: iscsi class session + * + * Removes and free iscsi host. + */ +static void +iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); @@ -452,6 +549,16 @@ iser_dif_prot_caps(int prot_caps) SHOST_DIX_TYPE3_PROTECTION : 0); } +/** + * iscsi_iser_session_create() - create an iscsi-iser session + * @ep: iscsi end-point handle + * @cmds_max: maximum commands in this session + * @qdepth: session command queue depth + * @initial_cmdsn: initiator command sequnce number + * + * Allocates and adds a scsi host, expose DIF supprot if + * exists, and sets up an iscsi session. + */ static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, @@ -564,6 +671,13 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, return 0; } +/** + * iscsi_iser_set_param() - set class connection parameter + * @cls_conn: iscsi class connection + * @stats: iscsi stats to output + * + * Output connection statistics. + */ static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) { @@ -612,6 +726,21 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, return len; } +/** + * iscsi_iser_ep_connect() - Initiate iSER connection establishment + * @shost: scsi_host + * @dst_addr: destination address + * @non-blocking: indicate if routine can block + * + * Allocate an iscsi endpoint, an iser_conn structure and bind them. + * After that start RDMA connection establishment via rdma_cm. We + * don't allocate iser_conn embedded in iscsi_endpoint since in teardown + * the endpoint will be destroyed at ep_disconnect while iser_conn will + * cleanup its resources asynchronuously. + * + * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error) + * if fails. + */ static struct iscsi_endpoint * iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking) @@ -644,6 +773,19 @@ failure: return ERR_PTR(err); } +/** + * iscsi_iser_ep_poll() - poll for iser connection establishment to complete + * @ep: iscsi endpoint (created at ep_connect) + * @timeout_ms: polling timeout allowed in ms. + * + * This routine boils down to waiting for up_completion signaling + * that cma_id got CONNECTED event. + * + * Return: 1 if succeeded in connection establishment, 0 if timeout expired + * (libiscsi will retry will kick in) or -1 if interrupted by signal + * or more likely iser connection state transitioned to TEMINATING or + * DOWN during the wait period. + */ static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { @@ -672,6 +814,15 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) return rc; /* signal */ } +/** + * iscsi_iser_ep_disconnect() - Initiate connection teardown process + * @ep: iscsi endpoint handle + * + * This routine is not blocked by iser and RDMA termination process + * completion as we queue a deffered work for iser/RDMA destruction + * and cleanup or actually call it immediately in case we didn't pass + * iscsi conn bind/start stage, thus it is safe. + */ static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { -- cgit v1.2.3 From b261aeafe115256c9b4589dd7bd4ca877eb0fa6c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 1 Oct 2014 14:02:17 +0300 Subject: IB/iser: Bump version, add maintainer Update the driver version and add Sagi Grimberg as maintainer Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 5250a125b79e..cd4174ca9a76 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -69,7 +69,7 @@ #define DRV_NAME "iser" #define PFX DRV_NAME ": " -#define DRV_VER "1.4.1" +#define DRV_VER "1.4.8" #define iser_dbg(fmt, arg...) \ do { \ -- cgit v1.2.3 From eefd56e5895540ea78979b7845f76c9bb94c8fed Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Sep 2014 16:47:50 +0300 Subject: IB/mlx5: Clear umr resources after ib_unregister_device Some ULPs may make use of resources created in create_umr_res so make sure to call destroy_umrc_res after returning from ib_unregister_device, which makes sure all ULPs have closed their resources. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d8907b20522a..ecd3aebc46fe 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1414,8 +1414,8 @@ err_dealloc: static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { struct mlx5_ib_dev *dev = context; - destroy_umrc_res(dev); ib_unregister_device(&dev->ib_dev); + destroy_umrc_res(dev); destroy_dev_resources(&dev->devr); free_comp_eqs(dev); ib_dealloc_device(&dev->ib_dev); -- cgit v1.2.3 From 900a6d79176ea9172e19f86aa324e9198b40339c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Sep 2014 16:47:51 +0300 Subject: IB/mlx5: Improve debug prints in mlx5_ib_reg_user_mr Print access flags and error code from ib_umem_get. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 80b3c63eab5d..8ee7cb46e059 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -881,12 +881,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int order; int err; - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", - start, virt_addr, length); + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, virt_addr, length, access_flags); umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); if (IS_ERR(umem)) { - mlx5_ib_dbg(dev, "umem get failed\n"); + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); return (void *)umem; } -- cgit v1.2.3 From 377b513485fd885dea1083a9a5430df65b35e048 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Sep 2014 16:47:52 +0300 Subject: IB/core: Avoid leakage from kernel to user space Clear the reserved field of struct ib_uverbs_async_event_desc which is copied to user space. Signed-off-by: Eli Cohen Reviewed-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c73b22a257fe..b21af1eaf503 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -477,6 +477,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.element = element; entry->desc.async.event_type = event; + entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); -- cgit v1.2.3 From 1c3ce90d0acb0c661910dd84f416b3053e9710af Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Sep 2014 16:47:53 +0300 Subject: IB/mlx5: Fix possible array overflow The check to verify that userspace does not provide an invalid index to the micro UAR was placed too late. Fix this by moving the check before using the index. Reported by: Shachar Raindel Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ecd3aebc46fe..a24431746377 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -650,13 +650,13 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return -EINVAL; idx = get_index(vma->vm_pgoff); + if (idx >= uuari->num_uars) + return -EINVAL; + pfn = uar_index2pfn(dev, uuari->uars[idx].index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, (unsigned long long)pfn); - if (idx >= uuari->num_uars) - return -EINVAL; - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) -- cgit v1.2.3 From f83b42636a91e63f330ea90996646b4a885aca74 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Sep 2014 16:47:54 +0300 Subject: IB/mlx5: Remove duplicate code from mlx5_set_path Some of the fields were set twice. Re-organize to avoid that. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8c574b63d77b..34b92fc345b7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1302,6 +1302,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->rlid = cpu_to_be16(ah->dlid); if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) { + pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len); + return -EINVAL; + } path->grh_mlid |= 1 << 7; path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; @@ -1317,22 +1322,6 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->static_rate = err; path->port = port; - if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) { - pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len); - return -EINVAL; - } - - path->grh_mlid |= 1 << 7; - path->mgid_index = ah->grh.sgid_index; - path->hop_limit = ah->grh.hop_limit; - path->tclass_flowlabel = - cpu_to_be32((ah->grh.traffic_class << 20) | - (ah->grh.flow_label)); - memcpy(path->rgid, ah->grh.dgid.raw, 16); - } - if (attr_mask & IB_QP_TIMEOUT) path->ackto_lt = attr->timeout << 3; -- cgit v1.2.3 From f39f86971c0cded8c2563e7dfd82c650ca9c0044 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 14 Sep 2014 16:47:55 +0300 Subject: IB/mlx5: Modify to work with arbitrary page size When dealing with umem objects, the driver assumed host page sizes defined by PAGE_SHIFT. Modify the code to use arbitrary page shift provided by umem->page_shift to support different page sizes. Signed-off-by: Yishai Hadas Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mem.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index a3e81444c825..dae07eae9507 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -55,16 +55,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, u64 pfn; struct scatterlist *sg; int entry; + unsigned long page_shift = ilog2(umem->page_size); - addr = addr >> PAGE_SHIFT; + addr = addr >> page_shift; tmp = (unsigned long)addr; m = find_first_bit(&tmp, sizeof(tmp)); skip = 1 << m; mask = skip - 1; i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; + len = sg_dma_len(sg) >> page_shift; + pfn = sg_dma_address(sg) >> page_shift; for (k = 0; k < len; k++) { if (!(i & mask)) { tmp = (unsigned long)pfn; @@ -103,14 +104,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, *ncont = 0; } - *shift = PAGE_SHIFT + m; + *shift = page_shift + m; *count = i; } void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int umr) { - int shift = page_shift - PAGE_SHIFT; + unsigned long umem_page_shift = ilog2(umem->page_size); + int shift = page_shift - umem_page_shift; int mask = (1 << shift) - 1; int i, k; u64 cur = 0; @@ -121,11 +123,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; + len = sg_dma_len(sg) >> umem_page_shift; base = sg_dma_address(sg); for (k = 0; k < len; k++) { if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); + cur = base + (k << umem_page_shift); if (umr) cur |= 3; @@ -134,7 +136,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, i >> shift, be64_to_cpu(pas[i >> shift])); } else mlx5_ib_dbg(dev, "=====> 0x%llx\n", - base + (k << PAGE_SHIFT)); + base + (k << umem_page_shift)); i++; } } -- cgit v1.2.3 From fd22f78cf7b95102d8e5b988afe27165e47471fc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Aug 2014 19:54:29 +0300 Subject: IB/mlx5: Use enumerations for PI copy mask In case input and output space parameters match, we can use a copy mask from input and output space. Use enums for those. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8c574b63d77b..554410c2655d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2095,11 +2095,11 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, /* Same block structure */ basic->bsf_size_sbs = 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) - basic->wire.copy_byte_mask |= 0xc0; + basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) - basic->wire.copy_byte_mask |= 0x30; + basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) - basic->wire.copy_byte_mask |= 0x0f; + basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; } else basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); -- cgit v1.2.3 From 6f5f8a016edf6c469d01c9bc563f6d74d12672d9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Aug 2014 19:54:30 +0300 Subject: IB/iser: Remove redundant assignment We clear the struct before - no need to do 0 assignment. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index de4db762dc77..d4f12245867e 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -510,7 +510,6 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) { switch (scsi_get_prot_type(sc)) { case SCSI_PROT_DIF_TYPE0: - *mask = 0x0; break; case SCSI_PROT_DIF_TYPE1: case SCSI_PROT_DIF_TYPE2: -- cgit v1.2.3 From f043032ef1acd294e0360bec96c91b90dec7cc17 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Aug 2014 19:54:31 +0300 Subject: IB/iser: Set IP_CSUM as default guard type In the future this will be a per-command parameter so we can lose it, but in the mean time IP_CSUM is a lot lighter for SW layers to compute, set it as default. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8c04c2d3cfd9..f42ab14105ac 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -83,7 +83,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); int iser_debug_level = 0; bool iser_pi_enable = false; -int iser_pi_guard = 0; +int iser_pi_guard = 1; MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); MODULE_LICENSE("Dual BSD/GPL"); @@ -97,7 +97,7 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644); MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); module_param_named(pi_guard, iser_pi_guard, int, 0644); -MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); +MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)"); static struct workqueue_struct *release_wq; struct iser_global ig; -- cgit v1.2.3 From 142537f4e5f7ffd3e34b0c46646ac9cb5d986d06 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Aug 2014 19:54:32 +0300 Subject: IB/mlx5: Use extended internal signature layout Rather than using the basic BSF layout which utilizes a pre-configured signature settings (sufficient for current DIF implementation), we use the extended BSF layout to expose advanced signature settings. These settings will also be exposed to the user later. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 80 +++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 554410c2655d..13924a256290 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2020,53 +2020,47 @@ static u8 bs_selector(int block_size) } } -static int format_selector(struct ib_sig_attrs *attr, - struct ib_sig_domain *domain, - int *selector) +static int mlx5_fill_inl_bsf(struct ib_sig_domain *domain, + struct mlx5_bsf_inl *inl) { - -#define FORMAT_DIF_NONE 0 -#define FORMAT_DIF_CRC_INC 8 -#define FORMAT_DIF_CRC_NO_INC 12 -#define FORMAT_DIF_CSUM_INC 13 -#define FORMAT_DIF_CSUM_NO_INC 14 + /* Valid inline section and allow BSF refresh */ + inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | + MLX5_BSF_REFRESH_DIF); + inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); + inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); switch (domain->sig.dif.type) { case IB_T10DIF_NONE: /* No DIF */ - *selector = FORMAT_DIF_NONE; break; case IB_T10DIF_TYPE1: /* Fall through */ case IB_T10DIF_TYPE2: - switch (domain->sig.dif.bg_type) { - case IB_T10DIF_CRC: - *selector = FORMAT_DIF_CRC_INC; - break; - case IB_T10DIF_CSUM: - *selector = FORMAT_DIF_CSUM_INC; - break; - default: - return 1; - } + inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? + MLX5_DIF_CRC : MLX5_DIF_IPCS; + /* + * increment reftag and don't check if + * apptag=0xffff and reftag=0xffffffff + */ + inl->dif_inc_ref_guard_check = MLX5_BSF_INC_REFTAG | + MLX5_BSF_APPREF_ESCAPE; + inl->dif_app_bitmask_check = 0xffff; + /* repeating block */ + inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; break; case IB_T10DIF_TYPE3: - switch (domain->sig.dif.bg_type) { - case IB_T10DIF_CRC: - *selector = domain->sig.dif.type3_inc_reftag ? - FORMAT_DIF_CRC_INC : - FORMAT_DIF_CRC_NO_INC; - break; - case IB_T10DIF_CSUM: - *selector = domain->sig.dif.type3_inc_reftag ? - FORMAT_DIF_CSUM_INC : - FORMAT_DIF_CSUM_NO_INC; - break; - default: - return 1; - } + inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? + MLX5_DIF_CRC : MLX5_DIF_IPCS; + /* + * Don't inc reftag and don't check if + * apptag=0xffff and reftag=0xffffffff + */ + inl->dif_inc_ref_guard_check = MLX5_BSF_APPREF_ESCAPE; + inl->dif_app_bitmask_check = 0xffff; + /* Repeating block */ + inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; break; default: - return 1; + return -EINVAL; } return 0; @@ -2080,7 +2074,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, struct mlx5_bsf_basic *basic = &bsf->basic; struct ib_sig_domain *mem = &sig_attrs->mem; struct ib_sig_domain *wire = &sig_attrs->wire; - int ret, selector; + int ret; memset(bsf, 0, sizeof(*bsf)); switch (sig_attrs->mem.sig_type) { @@ -2088,12 +2082,14 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) return -EINVAL; + /* Basic + Extended + Inline */ + basic->bsf_size_sbs = 1 << 7; /* Input domain check byte mask */ basic->check_byte_mask = sig_attrs->check_mask; if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && mem->sig.dif.type == wire->sig.dif.type) { /* Same block structure */ - basic->bsf_size_sbs = 1 << 4; + basic->bsf_size_sbs |= 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) @@ -2105,18 +2101,16 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); basic->raw_data_size = cpu_to_be32(data_size); + basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); + basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); - ret = format_selector(sig_attrs, mem, &selector); + ret = mlx5_fill_inl_bsf(wire, &bsf->w_inl); if (ret) return -EINVAL; - basic->m_bfs_psv = cpu_to_be32(selector << 24 | - msig->psv_memory.psv_idx); - ret = format_selector(sig_attrs, wire, &selector); + ret = mlx5_fill_inl_bsf(mem, &bsf->m_inl); if (ret) return -EINVAL; - basic->w_bfs_psv = cpu_to_be32(selector << 24 | - msig->psv_wire.psv_idx); break; default: -- cgit v1.2.3 From 92792c0a197db85e9d3039b59d2211ed0157e824 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Aug 2014 19:54:33 +0300 Subject: IB/iser: Centralize ib_sig_domain settings Later there will be more parameters to set, so we want to do it in a centralized place. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d4f12245867e..d9ed6234c505 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -457,44 +457,44 @@ scsi2ib_prot_type(unsigned char prot_type) } } +static inline void +iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs, + struct ib_sig_domain *domain) +{ + unsigned char scsi_ptype = scsi_get_prot_type(sc); + + domain->sig.dif.type = scsi2ib_prot_type(scsi_ptype); + domain->sig.dif.pi_interval = sc->device->sector_size; + domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff; +}; static int iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) { - unsigned char scsi_ptype = scsi_get_prot_type(sc); - sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size; - sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size; switch (scsi_get_prot_op(sc)) { case SCSI_PROT_WRITE_INSERT: case SCSI_PROT_READ_STRIP: sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; - sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; break; case SCSI_PROT_READ_INSERT: case SCSI_PROT_WRITE_STRIP: - sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : + IB_T10DIF_CRC; break; case SCSI_PROT_READ_PASS: case SCSI_PROT_WRITE_PASS: - sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; - sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & - 0xffffffff; + iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : + IB_T10DIF_CRC; break; default: iser_err("Unsupported PI operation %d\n", -- cgit v1.2.3 From 3d73cf1a2a05cca7b43f7a0c16d1077065b38385 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Aug 2014 19:54:34 +0300 Subject: Target/iser: Centralize ib_sig_domain setting Later there will be more parameters to set, so we want to do it in a centralized place. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/isert/ib_isert.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index da8ff124762a..73092756460b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2626,41 +2626,40 @@ se2ib_prot_type(enum target_prot_type prot_type) } } +static inline void +isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs, + struct ib_sig_domain *domain) +{ + enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type); + + domain->sig.dif.type = ib_prot_type; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = se_cmd->se_dev->dev_attrib.block_size; + domain->sig.dif.ref_tag = se_cmd->reftag_seed; +}; + static int isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) { - enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type); sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->mem.sig.dif.pi_interval = - se_cmd->se_dev->dev_attrib.block_size; - sig_attrs->wire.sig.dif.pi_interval = - se_cmd->se_dev->dev_attrib.block_size; switch (se_cmd->prot_op) { case TARGET_PROT_DIN_INSERT: case TARGET_PROT_DOUT_STRIP: sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; - sig_attrs->wire.sig.dif.type = ib_prot_type; - sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire); break; case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_STRIP: - sig_attrs->mem.sig.dif.type = ib_prot_type; - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); break; case TARGET_PROT_DIN_PASS: case TARGET_PROT_DOUT_PASS: - sig_attrs->mem.sig.dif.type = ib_prot_type; - sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; - sig_attrs->wire.sig.dif.type = ib_prot_type; - sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire); + isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); break; default: pr_err("Unsupported PI operation %d\n", se_cmd->prot_op); -- cgit v1.2.3 From 78eda2bb6542057b214af3bc1cae09c63e65d1d1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Aug 2014 19:54:35 +0300 Subject: IB/mlx5, iser, isert: Add Signature API additions Expose more signature setting parameters. We modify the signature API to allow usage of some new execution parameters relevant to data integrity feature. This patch modifies ib_sig_domain structure by: - Deprecate DIF type in signature API (operation will be determined by the parameters alone, no DIF type awareness) - Add APPTAG check bitmask (for input domain) - Add REFTAG remap (increment) flag for each domain - Add APPTAG/REFTAG escape options for each domain The mlx5 driver is modified to follow the new parameters in HW signature setup. At the moment the callers (iser/isert) hard-code new parameters (by DIF type). In the future, callers will retrieve them from the scsi command structure. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 104 +++++++++++++----------------- drivers/infiniband/ulp/iser/iser_memory.c | 50 +++++++------- drivers/infiniband/ulp/isert/ib_isert.c | 40 ++++-------- 3 files changed, 83 insertions(+), 111 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 13924a256290..d7f35e9e6522 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2020,50 +2020,31 @@ static u8 bs_selector(int block_size) } } -static int mlx5_fill_inl_bsf(struct ib_sig_domain *domain, - struct mlx5_bsf_inl *inl) +static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, + struct mlx5_bsf_inl *inl) { /* Valid inline section and allow BSF refresh */ inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | MLX5_BSF_REFRESH_DIF); inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); + /* repeating block */ + inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; + inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? + MLX5_DIF_CRC : MLX5_DIF_IPCS; - switch (domain->sig.dif.type) { - case IB_T10DIF_NONE: - /* No DIF */ - break; - case IB_T10DIF_TYPE1: /* Fall through */ - case IB_T10DIF_TYPE2: - inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? - MLX5_DIF_CRC : MLX5_DIF_IPCS; - /* - * increment reftag and don't check if - * apptag=0xffff and reftag=0xffffffff - */ - inl->dif_inc_ref_guard_check = MLX5_BSF_INC_REFTAG | - MLX5_BSF_APPREF_ESCAPE; - inl->dif_app_bitmask_check = 0xffff; - /* repeating block */ - inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; - break; - case IB_T10DIF_TYPE3: - inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? - MLX5_DIF_CRC : MLX5_DIF_IPCS; - /* - * Don't inc reftag and don't check if - * apptag=0xffff and reftag=0xffffffff - */ - inl->dif_inc_ref_guard_check = MLX5_BSF_APPREF_ESCAPE; - inl->dif_app_bitmask_check = 0xffff; - /* Repeating block */ - inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; - break; - default: - return -EINVAL; + if (domain->sig.dif.ref_remap) + inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; + + if (domain->sig.dif.app_escape) { + if (domain->sig.dif.ref_escape) + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; + else + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; } - return 0; + inl->dif_app_bitmask_check = + cpu_to_be16(domain->sig.dif.apptag_check_mask); } static int mlx5_set_bsf(struct ib_mr *sig_mr, @@ -2074,20 +2055,35 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, struct mlx5_bsf_basic *basic = &bsf->basic; struct ib_sig_domain *mem = &sig_attrs->mem; struct ib_sig_domain *wire = &sig_attrs->wire; - int ret; memset(bsf, 0, sizeof(*bsf)); + + /* Basic + Extended + Inline */ + basic->bsf_size_sbs = 1 << 7; + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + basic->raw_data_size = cpu_to_be32(data_size); + + /* Memory domain */ switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_NONE: + break; case IB_SIG_TYPE_T10_DIF: - if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) - return -EINVAL; + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); + mlx5_fill_inl_bsf(mem, &bsf->m_inl); + break; + default: + return -EINVAL; + } - /* Basic + Extended + Inline */ - basic->bsf_size_sbs = 1 << 7; - /* Input domain check byte mask */ - basic->check_byte_mask = sig_attrs->check_mask; + /* Wire domain */ + switch (sig_attrs->wire.sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && - mem->sig.dif.type == wire->sig.dif.type) { + mem->sig_type == wire->sig_type) { /* Same block structure */ basic->bsf_size_sbs |= 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) @@ -2099,20 +2095,9 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, } else basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); - basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); - basic->raw_data_size = cpu_to_be32(data_size); - basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); - - ret = mlx5_fill_inl_bsf(wire, &bsf->w_inl); - if (ret) - return -EINVAL; - - ret = mlx5_fill_inl_bsf(mem, &bsf->m_inl); - if (ret) - return -EINVAL; + mlx5_fill_inl_bsf(wire, &bsf->w_inl); break; - default: return -EINVAL; } @@ -2311,20 +2296,21 @@ static int set_psv_wr(struct ib_sig_domain *domain, memset(psv_seg, 0, sizeof(*psv_seg)); psv_seg->psv_num = cpu_to_be32(psv_idx); switch (domain->sig_type) { + case IB_SIG_TYPE_NONE: + break; case IB_SIG_TYPE_T10_DIF: psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | domain->sig.dif.app_tag); psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); - - *seg += sizeof(*psv_seg); - *size += sizeof(*psv_seg) / 16; break; - default: pr_err("Bad signature type given.\n"); return 1; } + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + return 0; } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d9ed6234c505..6c5ce357fba6 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -440,51 +440,44 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static inline enum ib_t10_dif_type -scsi2ib_prot_type(unsigned char prot_type) -{ - switch (prot_type) { - case SCSI_PROT_DIF_TYPE0: - return IB_T10DIF_NONE; - case SCSI_PROT_DIF_TYPE1: - return IB_T10DIF_TYPE1; - case SCSI_PROT_DIF_TYPE2: - return IB_T10DIF_TYPE2; - case SCSI_PROT_DIF_TYPE3: - return IB_T10DIF_TYPE3; - default: - return IB_T10DIF_NONE; - } -} - static inline void iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs, struct ib_sig_domain *domain) { - unsigned char scsi_ptype = scsi_get_prot_type(sc); - - domain->sig.dif.type = scsi2ib_prot_type(scsi_ptype); + domain->sig_type = IB_SIG_TYPE_T10_DIF; domain->sig.dif.pi_interval = sc->device->sector_size; domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff; + /* + * At the moment we hard code those, but in the future + * we will take them from sc. + */ + domain->sig.dif.apptag_check_mask = 0xffff; + domain->sig.dif.app_escape = true; + domain->sig.dif.ref_escape = true; + if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 || + scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2) + domain->sig.dif.ref_remap = true; }; static int iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) { - sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; - switch (scsi_get_prot_op(sc)) { case SCSI_PROT_WRITE_INSERT: case SCSI_PROT_READ_STRIP: - sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; break; case SCSI_PROT_READ_INSERT: case SCSI_PROT_WRITE_STRIP: - sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + /* + * At the moment we use this modparam to tell what is + * the memory bg_type, in the future we will take it + * from sc. + */ sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : IB_T10DIF_CRC; break; @@ -493,6 +486,11 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + /* + * At the moment we use this modparam to tell what is + * the memory bg_type, in the future we will take it + * from sc. + */ sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM : IB_T10DIF_CRC; break; @@ -501,10 +499,10 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) scsi_get_prot_op(sc)); return -EINVAL; } + return 0; } - static int iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) { diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 73092756460b..0bea5776bcbc 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2609,51 +2609,39 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, return ret; } -static inline enum ib_t10_dif_type -se2ib_prot_type(enum target_prot_type prot_type) -{ - switch (prot_type) { - case TARGET_DIF_TYPE0_PROT: - return IB_T10DIF_NONE; - case TARGET_DIF_TYPE1_PROT: - return IB_T10DIF_TYPE1; - case TARGET_DIF_TYPE2_PROT: - return IB_T10DIF_TYPE2; - case TARGET_DIF_TYPE3_PROT: - return IB_T10DIF_TYPE3; - default: - return IB_T10DIF_NONE; - } -} - static inline void isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs, struct ib_sig_domain *domain) { - enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type); - - domain->sig.dif.type = ib_prot_type; + domain->sig_type = IB_SIG_TYPE_T10_DIF; domain->sig.dif.bg_type = IB_T10DIF_CRC; domain->sig.dif.pi_interval = se_cmd->se_dev->dev_attrib.block_size; domain->sig.dif.ref_tag = se_cmd->reftag_seed; + /* + * At the moment we hard code those, but if in the future + * the target core would like to use it, we will take it + * from se_cmd. + */ + domain->sig.dif.apptag_check_mask = 0xffff; + domain->sig.dif.app_escape = true; + domain->sig.dif.ref_escape = true; + if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT || + se_cmd->prot_type == TARGET_DIF_TYPE2_PROT) + domain->sig.dif.ref_remap = true; }; static int isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) { - - sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; - sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; - switch (se_cmd->prot_op) { case TARGET_PROT_DIN_INSERT: case TARGET_PROT_DOUT_STRIP: - sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire); break; case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_STRIP: - sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); break; case TARGET_PROT_DIN_PASS: -- cgit v1.2.3 From 7ec11e0a14ae0a7a8fdce78860a70f25508de395 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Fri, 22 Aug 2014 16:57:19 +0530 Subject: RDMA/ocrdma: Add default GID at index 0 Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 256a06bc0b68..cb7b52a791db 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -388,6 +388,15 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); } +static void ocrdma_add_default_sgid(struct ocrdma_dev *dev) +{ + /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */ + union ib_gid *sgid = &dev->sgid_tbl[0]; + + sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + ocrdma_get_guid(dev, &sgid->raw[8]); +} + static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev, struct net_device *net) { @@ -434,6 +443,7 @@ static void ocrdma_init_gid_table(struct ocrdma_dev *dev) rdma_vlan_dev_real_dev(net_dev) : net_dev; if (real_dev == dev->nic_info.netdev) { + ocrdma_add_default_sgid(dev); ocrdma_init_ipv4_gids(dev, net_dev); ocrdma_init_ipv6_gids(dev, net_dev); } -- cgit v1.2.3 From bf67472ca1fac19c81367601bd4ba7d17bdc9f6e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Fri, 22 Aug 2014 16:57:20 +0530 Subject: RDMA/ocrdma: Get vlan tag from ib_qp_attrs After IP-based GID changes, VLAN id can be obtained from qp_attr->vlan_id. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index dd35ae558ae1..10a261c4ea98 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2279,7 +2279,8 @@ mbx_err: static int ocrdma_set_av_params(struct ocrdma_qp *qp, struct ocrdma_modify_qp *cmd, - struct ib_qp_attr *attrs) + struct ib_qp_attr *attrs, + int attr_mask) { int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; @@ -2319,8 +2320,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); - vlan_id = ah_attr->vlan_id; - if (vlan_id && (vlan_id < 0x1000)) { + if (attr_mask & IB_QP_VID) { + vlan_id = attrs->vlan_id; cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; @@ -2347,7 +2348,7 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID; } if (attr_mask & IB_QP_AV) { - status = ocrdma_set_av_params(qp, cmd, attrs); + status = ocrdma_set_av_params(qp, cmd, attrs, attr_mask); if (status) return status; } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) { -- cgit v1.2.3 From 1b76d38332271d5a0468ae5e465c5ea534c473c6 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Fri, 5 Sep 2014 19:35:40 +0530 Subject: RDMA/ocrdma: Convert kernel VA to PA for mmap in user In some platforms, when iommu is enabled, the bus address returned by dma_alloc_coherent is different than the physical address. ocrdma should use physical address for mmap-ing the queue memory for the applications. This patch adds the use of virt_to_phys() at all such places where kernel buffer is mapped to user process context. Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 8f5f2577f288..ef6c78c2f6b9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -388,7 +388,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, memset(&resp, 0, sizeof(resp)); resp.ah_tbl_len = ctx->ah_tbl.len; - resp.ah_tbl_page = ctx->ah_tbl.pa; + resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va); status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); if (status) @@ -870,7 +870,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, uresp.page_size = PAGE_ALIGN(cq->len); uresp.num_pages = 1; uresp.max_hw_cqe = cq->max_hw_cqe; - uresp.page_addr[0] = cq->pa; + uresp.page_addr[0] = virt_to_phys(cq->va); uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id); uresp.db_page_size = dev->nic_info.db_page_size; uresp.phase_change = cq->phase_change ? 1 : 0; @@ -1123,13 +1123,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, uresp.sq_dbid = qp->sq.dbid; uresp.num_sq_pages = 1; uresp.sq_page_size = PAGE_ALIGN(qp->sq.len); - uresp.sq_page_addr[0] = qp->sq.pa; + uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va); uresp.num_wqe_allocated = qp->sq.max_cnt; if (!srq) { uresp.rq_dbid = qp->rq.dbid; uresp.num_rq_pages = 1; uresp.rq_page_size = PAGE_ALIGN(qp->rq.len); - uresp.rq_page_addr[0] = qp->rq.pa; + uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va); uresp.num_rqe_allocated = qp->rq.max_cnt; } uresp.db_page_addr = usr_db; @@ -1680,7 +1680,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, memset(&uresp, 0, sizeof(uresp)); uresp.rq_dbid = srq->rq.dbid; uresp.num_rq_pages = 1; - uresp.rq_page_addr[0] = srq->rq.pa; + uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va); uresp.rq_page_size = srq->rq.len; uresp.db_page_addr = dev->nic_info.unmapped_db + (srq->pd->id * dev->nic_info.db_page_size); -- cgit v1.2.3 From e5dc9409933b21732a8c370e9f7cf79c2c92cbcb Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 29 Sep 2014 14:59:43 +0800 Subject: RDMA/ocrdma: Remove a unused-label warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If IPV6 is disabled, we get the warning: drivers/infiniband/hw/ocrdma/ocrdma_main.c:650:1: warning: label ‘err_notifier6’ defined but not used [-Wunused-label] Signed-off-by: Li RongQing Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index cb7b52a791db..b0b2257b8e04 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -656,8 +656,10 @@ static int __init ocrdma_init_module(void) return 0; err_be_reg: +#if IS_ENABLED(CONFIG_IPV6) ocrdma_unregister_inet6addr_notifier(); err_notifier6: +#endif ocrdma_unregister_inetaddr_notifier(); return status; } -- cgit v1.2.3 From beb9b703811736adfc608034d1f0d5cf0c8a7073 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Sun, 5 Oct 2014 16:33:23 +0200 Subject: RDMA/ocrdma: Don't memset() buffers we just allocated with kzalloc() Get rid of obfuscating ocrdma_alloc_mqe() kzalloc() wrapper as all it did was to make it less visible that the structure was already cleared on allocation. Signed-off-by: Jes Sorensen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 10a261c4ea98..0ac34cb0fcae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -348,11 +348,6 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len) return mqe; } -static void *ocrdma_alloc_mqe(void) -{ - return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); -} - static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q) { dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma); @@ -1189,10 +1184,10 @@ int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset) { struct ocrdma_rdma_stats_req *req = dev->stats_mem.va; struct ocrdma_mqe *mqe = &dev->stats_mem.mqe; - struct ocrdma_rdma_stats_resp *old_stats = NULL; + struct ocrdma_rdma_stats_resp *old_stats; int status; - old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL); + old_stats = kmalloc(sizeof(*old_stats), GFP_KERNEL); if (old_stats == NULL) return -ENOMEM; @@ -1235,10 +1230,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp; struct mgmt_hba_attribs *hba_attribs; - mqe = ocrdma_alloc_mqe(); + mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); if (!mqe) return status; - memset(mqe, 0, sizeof(*mqe)); dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp); dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev, -- cgit v1.2.3 From de12348535a93535c408de396d3505541ca5e0d6 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Sun, 5 Oct 2014 16:33:24 +0200 Subject: RDMA/ocrdma: The kernel has a perfectly good BIT() macro - use it No need to re-invent the wheel here Signed-off-by: Jes Sorensen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 4 +- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 200 +++++++++++++++--------------- 2 files changed, 101 insertions(+), 103 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 0ac34cb0fcae..638bff1ffc6c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -561,8 +561,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE); - cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE); + cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE); + cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE); cmd->async_cqid_ringsize = cq->id; cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 904989ec5eaa..c5212612f37f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -28,8 +28,6 @@ #ifndef __OCRDMA_SLI_H__ #define __OCRDMA_SLI_H__ -#define Bit(_b) (1 << (_b)) - enum { OCRDMA_ASIC_GEN_SKH_R = 0x04, OCRDMA_ASIC_GEN_LANCER = 0x0B @@ -238,7 +236,7 @@ struct ocrdma_mqe_sge { enum { OCRDMA_MQE_HDR_EMB_SHIFT = 0, - OCRDMA_MQE_HDR_EMB_MASK = Bit(0), + OCRDMA_MQE_HDR_EMB_MASK = BIT(0), OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3, OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT, OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24, @@ -304,7 +302,7 @@ struct ocrdma_create_eq_req { }; enum { - OCRDMA_CREATE_EQ_VALID = Bit(29), + OCRDMA_CREATE_EQ_VALID = BIT(29), OCRDMA_CREATE_EQ_CNT_SHIFT = 26, OCRDMA_CREATE_CQ_DELAY_SHIFT = 13, }; @@ -322,13 +320,13 @@ enum { OCRDMA_MCQE_ESTATUS_SHIFT = 16, OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT, OCRDMA_MCQE_CONS_SHIFT = 27, - OCRDMA_MCQE_CONS_MASK = Bit(27), + OCRDMA_MCQE_CONS_MASK = BIT(27), OCRDMA_MCQE_CMPL_SHIFT = 28, - OCRDMA_MCQE_CMPL_MASK = Bit(28), + OCRDMA_MCQE_CMPL_MASK = BIT(28), OCRDMA_MCQE_AE_SHIFT = 30, - OCRDMA_MCQE_AE_MASK = Bit(30), + OCRDMA_MCQE_AE_MASK = BIT(30), OCRDMA_MCQE_VALID_SHIFT = 31, - OCRDMA_MCQE_VALID_MASK = Bit(31) + OCRDMA_MCQE_VALID_MASK = BIT(31) }; struct ocrdma_mcqe { @@ -339,13 +337,13 @@ struct ocrdma_mcqe { }; enum { - OCRDMA_AE_MCQE_QPVALID = Bit(31), + OCRDMA_AE_MCQE_QPVALID = BIT(31), OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF, - OCRDMA_AE_MCQE_CQVALID = Bit(31), + OCRDMA_AE_MCQE_CQVALID = BIT(31), OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF, - OCRDMA_AE_MCQE_VALID = Bit(31), - OCRDMA_AE_MCQE_AE = Bit(30), + OCRDMA_AE_MCQE_VALID = BIT(31), + OCRDMA_AE_MCQE_AE = BIT(30), OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16, OCRDMA_AE_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT, @@ -386,9 +384,9 @@ enum { OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT, OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30, - OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = BIT(30), OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31, - OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31) + OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = BIT(31) }; struct ocrdma_ae_mpa_mcqe { @@ -412,9 +410,9 @@ enum { OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF << OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT, OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30, - OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = BIT(30), OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31, - OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31) + OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = BIT(31) }; struct ocrdma_ae_qp_mcqe { @@ -449,9 +447,9 @@ enum OCRDMA_ASYNC_EVENT_TYPE { /* mailbox command request and responses */ enum { OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2, - OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2), + OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = BIT(2), OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3, - OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3), + OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = BIT(3), OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8, OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF << OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT, @@ -672,9 +670,9 @@ enum { OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF, OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12, - OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12), - OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14), - OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15), + OCRDMA_CREATE_CQ_COALESCWM_MASK = BIT(13) | BIT(12), + OCRDMA_CREATE_CQ_FLAGS_NODELAY = BIT(14), + OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = BIT(15), OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF, OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF @@ -687,8 +685,8 @@ enum { OCRDMA_CREATE_CQ_EQID_SHIFT = 22, OCRDMA_CREATE_CQ_CNT_SHIFT = 27, - OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29), - OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31), + OCRDMA_CREATE_CQ_FLAGS_VALID = BIT(29), + OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = BIT(31), OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID | OCRDMA_CREATE_CQ_FLAGS_EVENTABLE | OCRDMA_CREATE_CQ_FLAGS_NODELAY @@ -731,8 +729,8 @@ enum { OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22, OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16, OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16, - OCRDMA_CREATE_MQ_VALID = Bit(31), - OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) + OCRDMA_CREATE_MQ_VALID = BIT(31), + OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = BIT(0) }; struct ocrdma_create_mq_req { @@ -783,7 +781,7 @@ enum { OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16, OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19, OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29, - OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29), + OCRDMA_CREATE_QP_REQ_QPT_MASK = BIT(31) | BIT(30) | BIT(29), OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0, OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF, @@ -798,23 +796,23 @@ enum { OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT, OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0, - OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0), + OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = BIT(0), OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1, - OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1), + OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = BIT(1), OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2, - OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2), + OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = BIT(2), OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3, - OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3), + OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = BIT(3), OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4, - OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4), + OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = BIT(4), OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5, - OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5), + OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = BIT(5), OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6, - OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6), + OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = BIT(6), OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7, - OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = BIT(7), OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8, - OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = BIT(8), OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16, OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT, @@ -927,7 +925,7 @@ enum { OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF << OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT, - OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0), + OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = BIT(0), OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1, OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF << OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT, @@ -964,38 +962,38 @@ enum { OCRDMA_MODIFY_QP_ID_SHIFT = 0, OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF, - OCRDMA_QP_PARA_QPS_VALID = Bit(0), - OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1), - OCRDMA_QP_PARA_PKEY_VALID = Bit(2), - OCRDMA_QP_PARA_QKEY_VALID = Bit(3), - OCRDMA_QP_PARA_PMTU_VALID = Bit(4), - OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5), - OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6), - OCRDMA_QP_PARA_RRC_VALID = Bit(7), - OCRDMA_QP_PARA_RQPSN_VALID = Bit(8), - OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9), - OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10), - OCRDMA_QP_PARA_RNT_VALID = Bit(11), - OCRDMA_QP_PARA_SQPSN_VALID = Bit(12), - OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13), - OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14), - OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15), - OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16), - OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17), - OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18), - OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19), - OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20), - OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21), - OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22), - OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23), - OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24), - OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25), - OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26), - - OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0), - OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1), - OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2), - OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3) + OCRDMA_QP_PARA_QPS_VALID = BIT(0), + OCRDMA_QP_PARA_SQD_ASYNC_VALID = BIT(1), + OCRDMA_QP_PARA_PKEY_VALID = BIT(2), + OCRDMA_QP_PARA_QKEY_VALID = BIT(3), + OCRDMA_QP_PARA_PMTU_VALID = BIT(4), + OCRDMA_QP_PARA_ACK_TO_VALID = BIT(5), + OCRDMA_QP_PARA_RETRY_CNT_VALID = BIT(6), + OCRDMA_QP_PARA_RRC_VALID = BIT(7), + OCRDMA_QP_PARA_RQPSN_VALID = BIT(8), + OCRDMA_QP_PARA_MAX_IRD_VALID = BIT(9), + OCRDMA_QP_PARA_MAX_ORD_VALID = BIT(10), + OCRDMA_QP_PARA_RNT_VALID = BIT(11), + OCRDMA_QP_PARA_SQPSN_VALID = BIT(12), + OCRDMA_QP_PARA_DST_QPN_VALID = BIT(13), + OCRDMA_QP_PARA_MAX_WQE_VALID = BIT(14), + OCRDMA_QP_PARA_MAX_RQE_VALID = BIT(15), + OCRDMA_QP_PARA_SGE_SEND_VALID = BIT(16), + OCRDMA_QP_PARA_SGE_RECV_VALID = BIT(17), + OCRDMA_QP_PARA_SGE_WR_VALID = BIT(18), + OCRDMA_QP_PARA_INB_RDEN_VALID = BIT(19), + OCRDMA_QP_PARA_INB_WREN_VALID = BIT(20), + OCRDMA_QP_PARA_FLOW_LBL_VALID = BIT(21), + OCRDMA_QP_PARA_BIND_EN_VALID = BIT(22), + OCRDMA_QP_PARA_ZLKEY_EN_VALID = BIT(23), + OCRDMA_QP_PARA_FMR_EN_VALID = BIT(24), + OCRDMA_QP_PARA_INBAT_EN_VALID = BIT(25), + OCRDMA_QP_PARA_VLAN_EN_VALID = BIT(26), + + OCRDMA_MODIFY_QP_FLAGS_RD = BIT(0), + OCRDMA_MODIFY_QP_FLAGS_WR = BIT(1), + OCRDMA_MODIFY_QP_FLAGS_SEND = BIT(2), + OCRDMA_MODIFY_QP_FLAGS_ATOMIC = BIT(3) }; enum { @@ -1014,15 +1012,15 @@ enum { OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT, - OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0), - OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1), - OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2), - OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3), - OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4), + OCRDMA_QP_PARAMS_FLAGS_FMR_EN = BIT(0), + OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = BIT(1), + OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = BIT(2), + OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = BIT(3), + OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = BIT(4), OCRDMA_QP_PARAMS_STATE_SHIFT = 5, - OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7), - OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8), - OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9), + OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7), + OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8), + OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9), OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, @@ -1277,7 +1275,7 @@ struct ocrdma_alloc_pd { }; enum { - OCRDMA_ALLOC_PD_RSP_DPP = Bit(16), + OCRDMA_ALLOC_PD_RSP_DPP = BIT(16), OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20, OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF, }; @@ -1309,18 +1307,18 @@ enum { OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF, OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0, - OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0), + OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = BIT(0), OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1, - OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1), + OCRDMA_ALLOC_LKEY_FMR_MASK = BIT(1), OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2, - OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2), + OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = BIT(2), OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3, - OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3), + OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = BIT(3), OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4, - OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4), + OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = BIT(4), OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5, - OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5), - OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6), + OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = BIT(5), + OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = BIT(6), OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6, OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16, OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF << @@ -1379,21 +1377,21 @@ enum { OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF << OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT, OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24, - OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24), + OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = BIT(24), OCRDMA_REG_NSMR_ZB_SHIFT = 25, - OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25), + OCRDMA_REG_NSMR_ZB_SHIFT_MASK = BIT(25), OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26, - OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26), + OCRDMA_REG_NSMR_REMOTE_INV_MASK = BIT(26), OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27, - OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27), + OCRDMA_REG_NSMR_REMOTE_WR_MASK = BIT(27), OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28, - OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28), + OCRDMA_REG_NSMR_REMOTE_RD_MASK = BIT(28), OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29, - OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29), + OCRDMA_REG_NSMR_LOCAL_WR_MASK = BIT(29), OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30, - OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30), + OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = BIT(30), OCRDMA_REG_NSMR_LAST_SHIFT = 31, - OCRDMA_REG_NSMR_LAST_MASK = Bit(31) + OCRDMA_REG_NSMR_LAST_MASK = BIT(31) }; struct ocrdma_reg_nsmr { @@ -1420,7 +1418,7 @@ enum { OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT, OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31, - OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31) + OCRDMA_REG_NSMR_CONT_LAST_MASK = BIT(31) }; struct ocrdma_reg_nsmr_cont { @@ -1566,7 +1564,7 @@ struct ocrdma_delete_ah_tbl_rsp { enum { OCRDMA_EQE_VALID_SHIFT = 0, - OCRDMA_EQE_VALID_MASK = Bit(0), + OCRDMA_EQE_VALID_MASK = BIT(0), OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE, OCRDMA_EQE_RESOURCE_ID_SHIFT = 16, OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF << @@ -1624,11 +1622,11 @@ enum { OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT, OCRDMA_CQE_STATUS_SHIFT = 16, OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT, - OCRDMA_CQE_VALID = Bit(31), - OCRDMA_CQE_INVALIDATE = Bit(30), - OCRDMA_CQE_QTYPE = Bit(29), - OCRDMA_CQE_IMM = Bit(28), - OCRDMA_CQE_WRITE_IMM = Bit(27), + OCRDMA_CQE_VALID = BIT(31), + OCRDMA_CQE_INVALIDATE = BIT(30), + OCRDMA_CQE_QTYPE = BIT(29), + OCRDMA_CQE_IMM = BIT(28), + OCRDMA_CQE_WRITE_IMM = BIT(27), OCRDMA_CQE_QTYPE_SQ = 0, OCRDMA_CQE_QTYPE_RQ = 1, OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF @@ -1772,8 +1770,8 @@ struct ocrdma_grh { u16 rsvd; } __packed; -#define OCRDMA_AV_VALID Bit(7) -#define OCRDMA_AV_VLAN_VALID Bit(1) +#define OCRDMA_AV_VALID BIT(7) +#define OCRDMA_AV_VLAN_VALID BIT(1) struct ocrdma_av { struct ocrdma_eth_vlan eth_hdr; -- cgit v1.2.3 From 05df78059b3b991036666de9ee124d5cbd9e375d Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Sun, 5 Oct 2014 16:33:25 +0200 Subject: RDMA/ocrdma: Save the bit environment, spare unncessary parenthesis Parenthesis around constants serves no purpose, save the bits! Signed-off-by: Jes Sorensen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index c5212612f37f..4e036480c1a8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -101,7 +101,7 @@ enum { QTYPE_MCCQ = 3 }; -#define OCRDMA_MAX_SGID (8) +#define OCRDMA_MAX_SGID 8 #define OCRDMA_MAX_QP 2048 #define OCRDMA_MAX_CQ 2048 @@ -126,33 +126,33 @@ enum { #define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ /* qid #2 msbits at 12-11 */ #define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1 -#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */ +#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT 16 /* bits 16 - 28 */ /* Rearm bit */ -#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */ +#define OCRDMA_DB_CQ_REARM_SHIFT 29 /* bit 29 */ /* solicited bit */ -#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */ +#define OCRDMA_DB_CQ_SOLICIT_SHIFT 31 /* bit 31 */ #define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */ #define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */ -#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */ +#define OCRDMA_EQ_ID_EXT_MASK_SHIFT 2 /* qid bits 9-13 at 11-15 */ /* Clear the interrupt for this eq */ -#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */ +#define OCRDMA_EQ_CLR_SHIFT 9 /* bit 9 */ /* Must be 1 */ -#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */ +#define OCRDMA_EQ_TYPE_SHIFT 10 /* bit 10 */ /* Number of event entries processed */ -#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */ +#define OCRDMA_NUM_EQE_SHIFT 16 /* bits 16 - 28 */ /* Rearm bit */ -#define OCRDMA_REARM_SHIFT (29) /* bit 29 */ +#define OCRDMA_REARM_SHIFT 29 /* bit 29 */ #define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */ /* Number of entries posted */ -#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */ +#define OCRDMA_MQ_NUM_MQE_SHIFT 16 /* bits 16 - 29 */ -#define OCRDMA_MIN_HPAGE_SIZE (4096) +#define OCRDMA_MIN_HPAGE_SIZE 4096 -#define OCRDMA_MIN_Q_PAGE_SIZE (4096) -#define OCRDMA_MAX_Q_PAGES (8) +#define OCRDMA_MIN_Q_PAGE_SIZE 4096 +#define OCRDMA_MAX_Q_PAGES 8 #define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C #define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF @@ -168,14 +168,14 @@ enum { # 6: 256K Bytes # 7: 512K Bytes */ -#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8) +#define OCRDMA_MAX_Q_PAGE_SIZE_CNT 8 #define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES) -#define MAX_OCRDMA_QP_PAGES (8) +#define MAX_OCRDMA_QP_PAGES 8 #define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE) -#define OCRDMA_CREATE_CQ_MAX_PAGES (4) -#define OCRDMA_DPP_CQE_SIZE (4) +#define OCRDMA_CREATE_CQ_MAX_PAGES 4 +#define OCRDMA_DPP_CQE_SIZE 4 #define OCRDMA_GEN2_MAX_CQE 1024 #define OCRDMA_GEN2_CQ_PAGE_SIZE 4096 @@ -290,7 +290,7 @@ struct ocrdma_pa { u32 hi; }; -#define MAX_OCRDMA_EQ_PAGES (8) +#define MAX_OCRDMA_EQ_PAGES 8 struct ocrdma_create_eq_req { struct ocrdma_mbx_hdr req; u32 num_pages; @@ -312,7 +312,7 @@ struct ocrdma_create_eq_rsp { u32 vector_eqid; }; -#define OCRDMA_EQ_MINOR_OTHER (0x1) +#define OCRDMA_EQ_MINOR_OTHER 0x1 enum { OCRDMA_MCQE_STATUS_SHIFT = 0, -- cgit v1.2.3 From 8b0f93d9490653a7b9fc91f3570089132faed1c0 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Fri, 26 Sep 2014 20:45:32 +0530 Subject: IB/core: Clear AH attr variable to prevent garbage data During create-ah from userspace, uverbs is sending garbage data in attr.dmac and attr.vlan_id. This patch sets attr.dmac and attr.vlan_id to zero. Fixes: dd5f03beb4f7 ("IB/core: Ethernet L2 attributes in verbs/cm structures") Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 0600c50e6215..5ba2a86aab6a 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2518,6 +2518,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, attr.grh.sgid_index = cmd.attr.grh.sgid_index; attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; + attr.vlan_id = 0; + memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); ah = ib_create_ah(pd, &attr); -- cgit v1.2.3 From a040f95dc81986d7d55bd98a9455217522ef706d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 23 Sep 2014 12:38:26 +0300 Subject: IB/core: Fix XRC race condition in ib_uverbs_open_qp In ib_uverbs_open_qp, the sharable xrc target qp is created as a "pseudo" qp and added to a list of qp's sharing the same physical QP. This is done before the "pseudo" qp is assigned a uobject. There is a race condition here if an async event arrives at the physical qp. If the event is handled after the pseudo qp is added to the list, but before it is assigned a uobject, the kernel crashes in ib_uverbs_qp_event_handler, due to trying to dereference a NULL uobject pointer. Note that simply checking for non-NULL is not enough, due to error flows in ib_uverbs_open_qp. If the failure is after assigning the uobject, but before the qp has fully been created, we still have a problem. Thus, in ib_uverbs_qp_event_handler, we test that the uobject is present, and also that it is live. Reported-by: Matthew Finlay Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c73b22a257fe..bb6fea12ce31 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -502,6 +502,10 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; + /* for XRC target qp's, check that qp is live */ + if (!event->element.qp->uobject || !event->element.qp->uobject->live) + return; + uobj = container_of(event->element.qp->uobject, struct ib_uevent_object, uobject); -- cgit v1.2.3 From 65d4c01af0f5b394a33848bc27c4331e2959dfd5 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 29 Aug 2014 11:19:29 -0500 Subject: RDMA/cxgb4: Make c4iw_wr_log_size_order static This fixes a sparse warning. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index f25df5276c22..72f1f052e88c 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -60,7 +60,7 @@ int c4iw_wr_log = 0; module_param(c4iw_wr_log, int, 0444); MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data."); -int c4iw_wr_log_size_order = 12; +static int c4iw_wr_log_size_order = 12; module_param(c4iw_wr_log_size_order, int, 0444); MODULE_PARM_DESC(c4iw_wr_log_size_order, "Number of entries (log2) in the work request timing log."); -- cgit v1.2.3 From 04524a47c36bc7fc573a4856b4e0fd8cf66c3126 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 24 Sep 2014 03:53:41 +0530 Subject: RDMA/cxgb4: Take IPv6 into account for best_mtu and set_emss best_mtu and set_emss were not considering ipv6 header for ipv6 case. Signed-off-by: Hariprasad Shenai Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c2fb71c182a8..2ca9ec8d6474 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -236,10 +236,12 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) static void set_emss(struct c4iw_ep *ep, u16 opt) { ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - - sizeof(struct iphdr) - sizeof(struct tcphdr); + ((AF_INET == ep->com.remote_addr.ss_family) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)) - + sizeof(struct tcphdr); ep->mss = ep->emss; if (GET_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; + ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4); if (ep->emss < 128) ep->emss = 128; if (ep->emss & 7) @@ -581,11 +583,14 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep, } static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts) + unsigned int *idx, int use_ts, int ipv6) { - unsigned short hdr_size = sizeof(struct iphdr) + + unsigned short hdr_size = (ipv6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + sizeof(struct tcphdr) + - (use_ts ? 12 : 0); + (use_ts ? + round_up(TCPOLEN_TIMESTAMP, 4) : 0); unsigned short data_size = mtu - hdr_size; cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); @@ -634,7 +639,8 @@ static int send_connect(struct c4iw_ep *ep) set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps); + enable_tcp_timestamps, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -1763,7 +1769,8 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps); + enable_tcp_timestamps, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* @@ -2162,7 +2169,8 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, ep->hwtid)); best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp); + enable_tcp_timestamps && req->tcpopt.tstamp, + (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); wscale = compute_wscale(rcv_win); /* -- cgit v1.2.3 From d480201b2261d26b4ca1fa4cc2bfc6cb644733a3 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 24 Sep 2014 03:53:42 +0530 Subject: RDMA/cxgb4: Add missing neigh_release in find_route Signed-off-by: Hariprasad Shenai Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 2ca9ec8d6474..2ee9892850ba 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -417,6 +417,7 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, return NULL; if (!our_interface(dev, n->dev) && !(n->dev->flags & IFF_LOOPBACK)) { + neigh_release(n); dst_release(&rt->dst); return NULL; } -- cgit v1.2.3 From da22b896b119001b0cd9eb9ecb5b3fd85e948e43 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 24 Sep 2014 03:53:43 +0530 Subject: RDMA/cxgb4: Fix ntuple calculation for ipv6 and remove duplicate line This fixes ntuple calculation for IPv6 active open request for T5 adapter. And also removes an duplicate line which got added in commit 92e7ae71726c ("iw_cxgb4: Choose appropriate hw mtu index and ISS for iWARP connections") Signed-off-by: Hariprasad Shenai Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 2ee9892850ba..fb61f6685809 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -675,6 +675,7 @@ static int send_connect(struct c4iw_ep *ep) if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { opt2 |= T5_OPT_2_VALID; opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ } t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); @@ -720,8 +721,6 @@ static int send_connect(struct c4iw_ep *ep) } else { u32 isn = (prandom_u32() & ~7UL) - 1; - opt2 |= T5_OPT_2_VALID; - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ if (peer2peer) isn += 4; @@ -763,10 +762,10 @@ static int send_connect(struct c4iw_ep *ep) t5_req6->peer_ip_lo = *((__be64 *) (ra6->sin6_addr.s6_addr + 8)); t5_req6->opt0 = cpu_to_be64(opt0); - t5_req6->params = (__force __be64)cpu_to_be32( + t5_req6->params = cpu_to_be64(V_FILTER_TUPLE( cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], - ep->l2t)); + ep->l2t))); t5_req6->rsvd = cpu_to_be32(isn); PDBG("%s snd_isn %u\n", __func__, be32_to_cpu(t5_req6->rsvd)); -- cgit v1.2.3