From 40753caa364bfba60ebd5e2a8bdf366ef175d03c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:53:56 -0500 Subject: [SCSI] iscsi class, iscsi_tcp/iser: add host arg to session creation iscsi offload (bnx2i and qla4xx) allocate a scsi host per hba, so the session creation path needs a shost/host_no argument. Software iscsi/iser will follow the same behabior as before where it allcoates a host per session, but in the future iser will probably look more like bnx2i where the host's parent is the hardware (rnic for iser and for bnx2i it is the nic), because it does not use a socket layer like how iscsi_tcp does. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index aeb58cae9a3f..efc121986c50 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -368,6 +368,7 @@ static struct iscsi_transport iscsi_iser_transport; static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_transport *iscsit, struct scsi_transport_template *scsit, + struct Scsi_Host *shost, uint16_t cmds_max, uint16_t qdepth, uint32_t initial_cmdsn, uint32_t *hostno) { -- cgit v1.2.3 From d3826721b198001c55353b1c54e10843068aae63 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:53:57 -0500 Subject: [SCSI] iscsi class, iscsi drivers: remove unused iscsi_transport attrs max_cmd_len and max_conn are not really used. max_cmd_len is always 16 and can be set by the LLD. max_conn is always one since we do not support MCS. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 1 - drivers/infiniband/ulp/iser/iscsi_iser.h | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index efc121986c50..32f5d5e79abf 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -592,7 +592,6 @@ static struct iscsi_transport iscsi_iser_transport = { .host_template = &iscsi_iser_sht, .conndata_size = sizeof(struct iscsi_conn), .max_lun = ISCSI_ISER_MAX_LUN, - .max_cmd_len = ISCSI_ISER_MAX_CMD_LEN, /* session management */ .create_session = iscsi_iser_session_create, .destroy_session = iscsi_session_teardown, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a8c1b300e34d..66a2f30ada01 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -96,7 +96,6 @@ /* support upto 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISCSI_ISER_MAX_LUN 256 -#define ISCSI_ISER_MAX_CMD_LEN 16 /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ -- cgit v1.2.3 From 756135215ec743be6fdce2bdebe8cdb9f8a231f6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:53:59 -0500 Subject: [SCSI] iscsi: remove session and host binding in libiscsi bnx2i allocates a host per netdevice but will use libiscsi, so this unbinds the session from the host in that code. This will also be useful for the iser parent device dma settings fixes. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 74 +++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 32f5d5e79abf..5a750042e2b2 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -74,6 +74,10 @@ #include "iscsi_iser.h" +static struct scsi_host_template iscsi_iser_sht; +static struct iscsi_transport iscsi_iser_transport; +static struct scsi_transport_template *iscsi_iser_scsi_transport; + static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); @@ -363,40 +367,64 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) return iscsi_conn_start(cls_conn); } -static struct iscsi_transport iscsi_iser_transport; +static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) +{ + struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + + iscsi_session_teardown(cls_session); + scsi_remove_host(shost); + iscsi_host_teardown(shost); + scsi_host_put(shost); +} static struct iscsi_cls_session * -iscsi_iser_session_create(struct iscsi_transport *iscsit, - struct scsi_transport_template *scsit, - struct Scsi_Host *shost, - uint16_t cmds_max, uint16_t qdepth, - uint32_t initial_cmdsn, uint32_t *hostno) +iscsi_iser_session_create(struct Scsi_Host *shost, + uint16_t cmds_max, uint16_t qdepth, + uint32_t initial_cmdsn, uint32_t *hostno) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; int i; - uint32_t hn; struct iscsi_cmd_task *ctask; struct iscsi_mgmt_task *mtask; struct iscsi_iser_cmd_task *iser_ctask; struct iser_desc *desc; + if (shost) { + printk(KERN_ERR "iscsi_tcp: invalid shost %d.\n", + shost->host_no); + return NULL; + } + + shost = scsi_host_alloc(&iscsi_iser_sht, 0); + if (!shost) + return NULL; + shost->transportt = iscsi_iser_scsi_transport; + shost->max_lun = iscsi_max_lun; + shost->max_id = 0; + shost->max_channel = 0; + shost->max_cmd_len = 16; + + iscsi_host_setup(shost, qdepth); + + if (scsi_add_host(shost, NULL)) + goto free_host; + *hostno = shost->host_no; + /* * we do not support setting can_queue cmd_per_lun from userspace yet * because we preallocate so many resources */ - cls_session = iscsi_session_setup(iscsit, scsit, + cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, ISCSI_DEF_XMIT_CMDS_MAX, - ISCSI_MAX_CMD_PER_LUN, sizeof(struct iscsi_iser_cmd_task), sizeof(struct iser_desc), - initial_cmdsn, &hn); + initial_cmdsn); if (!cls_session) - return NULL; - - *hostno = hn; - session = class_to_transport_session(cls_session); + goto remove_host; + session = cls_session->dd_data; + shost->can_queue = session->cmds_max; /* libiscsi setup itts, data and pool so just set desc fields */ for (i = 0; i < session->cmds_max; i++) { ctask = session->cmds[i]; @@ -413,6 +441,13 @@ iscsi_iser_session_create(struct iscsi_transport *iscsit, } return cls_session; + +remove_host: + scsi_remove_host(shost); +free_host: + iscsi_host_teardown(shost); + scsi_host_put(shost); + return NULL; } static int @@ -589,12 +624,11 @@ static struct iscsi_transport iscsi_iser_transport = { .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME | ISCSI_HOST_INITIATOR_NAME, - .host_template = &iscsi_iser_sht, .conndata_size = sizeof(struct iscsi_conn), - .max_lun = ISCSI_ISER_MAX_LUN, + .sessiondata_size = sizeof(struct iscsi_session), /* session management */ .create_session = iscsi_iser_session_create, - .destroy_session = iscsi_session_teardown, + .destroy_session = iscsi_iser_session_destroy, /* connection management */ .create_conn = iscsi_iser_conn_create, .bind_conn = iscsi_iser_conn_bind, @@ -633,8 +667,6 @@ static int __init iser_init(void) return -EINVAL; } - iscsi_iser_transport.max_lun = iscsi_max_lun; - memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", @@ -650,7 +682,9 @@ static int __init iser_init(void) mutex_init(&ig.connlist_mutex); INIT_LIST_HEAD(&ig.connlist); - if (!iscsi_register_transport(&iscsi_iser_transport)) { + iscsi_iser_scsi_transport = iscsi_register_transport( + &iscsi_iser_transport); + if (!iscsi_iser_scsi_transport) { iser_err("iscsi_register_transport failed\n"); err = -EINVAL; goto register_transport_failure; -- cgit v1.2.3 From a4804cd6eb19318ae8d08ea967cfeaaf5c5b68a6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:00 -0500 Subject: [SCSI] iscsi: add iscsi host helpers This finishes the host/session unbinding, by adding some helpers to add and remove hosts and the session they manage. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 5a750042e2b2..62e35e503e49 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -371,10 +371,8 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); - iscsi_session_teardown(cls_session); - scsi_remove_host(shost); - iscsi_host_teardown(shost); - scsi_host_put(shost); + iscsi_host_remove(shost); + iscsi_host_free(shost); } static struct iscsi_cls_session * @@ -396,7 +394,7 @@ iscsi_iser_session_create(struct Scsi_Host *shost, return NULL; } - shost = scsi_host_alloc(&iscsi_iser_sht, 0); + shost = iscsi_host_alloc(&iscsi_iser_sht, 0, ISCSI_MAX_CMD_PER_LUN); if (!shost) return NULL; shost->transportt = iscsi_iser_scsi_transport; @@ -405,9 +403,7 @@ iscsi_iser_session_create(struct Scsi_Host *shost, shost->max_channel = 0; shost->max_cmd_len = 16; - iscsi_host_setup(shost, qdepth); - - if (scsi_add_host(shost, NULL)) + if (iscsi_host_add(shost, NULL)) goto free_host; *hostno = shost->host_no; @@ -443,10 +439,9 @@ iscsi_iser_session_create(struct Scsi_Host *shost, return cls_session; remove_host: - scsi_remove_host(shost); + iscsi_host_remove(shost); free_host: - iscsi_host_teardown(shost); - scsi_host_put(shost); + iscsi_host_free(shost); return NULL; } -- cgit v1.2.3 From 5d91e209fb21fb9cc765729d4c6a85a9fb6c9187 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:01 -0500 Subject: [SCSI] iscsi: remove session/conn_data_size from iscsi_transport This removes the session and conn data_size fields from the iscsi_transport. Just pass in the value like with host allocation. This patch also makes it so the LLD iscsi_conn data is allocated with the iscsi_cls_conn. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 62e35e503e49..9b34946eb00d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -279,7 +279,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) struct iscsi_cls_conn *cls_conn; struct iscsi_iser_conn *iser_conn; - cls_conn = iscsi_conn_setup(cls_session, conn_idx); + cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx); if (!cls_conn) return NULL; conn = cls_conn->dd_data; @@ -290,10 +290,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) */ conn->max_recv_dlength = 128; - iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL); - if (!iser_conn) - goto conn_alloc_fail; - + iser_conn = conn->dd_data; /* currently this is the only field which need to be initiated */ rwlock_init(&iser_conn->lock); @@ -301,10 +298,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) iser_conn->iscsi_conn = conn; return cls_conn; - -conn_alloc_fail: - iscsi_conn_teardown(cls_conn); - return NULL; } static void @@ -313,10 +306,9 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn = conn->dd_data; - iscsi_conn_teardown(cls_conn); if (iser_conn->ib_conn) iser_conn->ib_conn->iser_conn = NULL; - kfree(iser_conn); + iscsi_conn_teardown(cls_conn); } static int @@ -619,8 +611,6 @@ static struct iscsi_transport iscsi_iser_transport = { .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME | ISCSI_HOST_INITIATOR_NAME, - .conndata_size = sizeof(struct iscsi_conn), - .sessiondata_size = sizeof(struct iscsi_session), /* session management */ .create_session = iscsi_iser_session_create, .destroy_session = iscsi_iser_session_destroy, -- cgit v1.2.3 From b40977d95fb3a1898ace6a7d97e4ed1a33a440a4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:03 -0500 Subject: [SCSI] iser: fix handling of scsi cmnds during recovery. After the stop_conn callback has returned the LLD should not touch the scsi cmds. iscsi_tcp and libiscsi use the conn->recv_lock and suspend_rx field to halt recv path processing, but iser does not have any protection. This patch modifies iser so that userspace can just call the ep_disconnect callback, which will halt all recv IO, before calling the stop_conn callback so we do not have to worry about the conn->recv_lock and suspend rx field. iser just needs to stop the send side from accessing the ib conn. Fixup to handle when the ep poll fails and ep disconnect is called from Erez. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 42 +++++++++++++++++++++++++++++--- drivers/infiniband/ulp/iser/iscsi_iser.h | 5 ++++ drivers/infiniband/ulp/iser/iser_verbs.c | 14 ++++++++++- 3 files changed, 57 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9b34946eb00d..8a1bfb7277c8 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -305,10 +305,18 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = iser_conn->ib_conn; - if (iser_conn->ib_conn) - iser_conn->ib_conn->iser_conn = NULL; iscsi_conn_teardown(cls_conn); + /* + * Userspace will normally call the stop callback and + * already have freed the ib_conn, but if it goofed up then + * we free it here. + */ + if (ib_conn) { + ib_conn->iser_conn = NULL; + iser_conn_put(ib_conn); + } } static int @@ -340,12 +348,29 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, iser_conn = conn->dd_data; ib_conn->iser_conn = iser_conn; iser_conn->ib_conn = ib_conn; + iser_conn_get(ib_conn); conn->recv_lock = &iser_conn->lock; return 0; } +static void +iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = iser_conn->ib_conn; + + iscsi_conn_stop(cls_conn, flag); + /* + * There is no unbind event so the stop callback + * must release the ref from the bind. + */ + iser_conn_put(ib_conn); + iser_conn->ib_conn = NULL; +} + static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) { @@ -564,6 +589,17 @@ iscsi_iser_ep_disconnect(__u64 ep_handle) if (!ib_conn) return; + if (ib_conn->iser_conn) + /* + * Must suspend xmit path if the ep is bound to the + * iscsi_conn, so we know we are not accessing the ib_conn + * when we free it. + * + * This may not be bound if the ep poll failed. + */ + iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); + + iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state); iser_conn_terminate(ib_conn); } @@ -622,7 +658,7 @@ static struct iscsi_transport iscsi_iser_transport = { .get_conn_param = iscsi_conn_get_param, .get_session_param = iscsi_session_get_param, .start_conn = iscsi_iser_conn_start, - .stop_conn = iscsi_conn_stop, + .stop_conn = iscsi_iser_conn_stop, /* iscsi host params */ .get_host_param = iscsi_host_get_param, .set_host_param = iscsi_host_set_param, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 66a2f30ada01..bd5c1a554ea6 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -242,6 +242,7 @@ struct iser_device { struct iser_conn { struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ enum iser_ib_conn_state state; /* rdma connection state */ + atomic_t refcount; spinlock_t lock; /* used for state changes */ struct iser_device *device; /* device context */ struct rdma_cm_id *cma_id; /* CMA ID */ @@ -314,6 +315,10 @@ void iscsi_iser_recv(struct iscsi_conn *conn, int iser_conn_init(struct iser_conn **ib_conn); +void iser_conn_get(struct iser_conn *ib_conn); + +void iser_conn_put(struct iser_conn *ib_conn); + void iser_conn_terminate(struct iser_conn *ib_conn); void iser_rcv_completion(struct iser_desc *desc, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index d19cfe605ebb..5daed2bd710e 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -328,6 +328,17 @@ static void iser_conn_release(struct iser_conn *ib_conn) kfree(ib_conn); } +void iser_conn_get(struct iser_conn *ib_conn) +{ + atomic_inc(&ib_conn->refcount); +} + +void iser_conn_put(struct iser_conn *ib_conn) +{ + if (atomic_dec_and_test(&ib_conn->refcount)) + iser_conn_release(ib_conn); +} + /** * triggers start of the disconnect procedures and wait for them to be done */ @@ -349,7 +360,7 @@ void iser_conn_terminate(struct iser_conn *ib_conn) wait_event_interruptible(ib_conn->wait, ib_conn->state == ISER_CONN_DOWN); - iser_conn_release(ib_conn); + iser_conn_put(ib_conn); } static void iser_connect_error(struct rdma_cm_id *cma_id) @@ -496,6 +507,7 @@ int iser_conn_init(struct iser_conn **ibconn) init_waitqueue_head(&ib_conn->wait); atomic_set(&ib_conn->post_recv_buf_count, 0); atomic_set(&ib_conn->post_send_buf_count, 0); + atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); -- cgit v1.2.3 From 0af967f5d4f2dd1e00618d34ac988037d37a6c3b Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:04 -0500 Subject: [SCSI] libiscsi, iscsi_tcp, iser: add session cmds array accessor Currently to get a ctask from the session cmd array, you have to know to use the itt modifier. To make this easier on LLDs and so in the future we can easilly kill the session array and use the host shared map instead, this patch adds a nice wrapper to strip the itt into a session->cmds index and return a ctask. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 8 +------- drivers/infiniband/ulp/iser/iser_initiator.c | 23 ++++++++++------------- 2 files changed, 11 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8a1bfb7277c8..7b1468869066 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -98,7 +98,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, char *rx_data, int rx_data_len) { int rc = 0; - uint32_t ret_itt; int datalen; int ahslen; @@ -114,12 +113,7 @@ iscsi_iser_recv(struct iscsi_conn *conn, /* read AHS */ ahslen = hdr->hlength * 4; - /* verify itt (itt encoding: age+cid+itt) */ - rc = iscsi_verify_itt(conn, hdr, &ret_itt); - - if (!rc) - rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); - + rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) goto error; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 08dc81c46f41..b82a5f2d4d37 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -537,13 +537,11 @@ void iser_rcv_completion(struct iser_desc *rx_desc, { struct iser_dto *dto = &rx_desc->dto; struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; - struct iscsi_session *session = conn->iscsi_conn->session; struct iscsi_cmd_task *ctask; struct iscsi_iser_cmd_task *iser_ctask; struct iscsi_hdr *hdr; char *rx_data = NULL; int rx_data_len = 0; - unsigned int itt; unsigned char opcode; hdr = &rx_desc->iscsi_header; @@ -559,19 +557,18 @@ void iser_rcv_completion(struct iser_desc *rx_desc, opcode = hdr->opcode & ISCSI_OPCODE_MASK; if (opcode == ISCSI_OP_SCSI_CMD_RSP) { - itt = get_itt(hdr->itt); /* mask out cid and age bits */ - if (!(itt < session->cmds_max)) + ctask = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); + if (!ctask) iser_err("itt can't be matched to task!!! " - "conn %p opcode %d cmds_max %d itt %d\n", - conn->iscsi_conn,opcode,session->cmds_max,itt); - /* use the mapping given with the cmds array indexed by itt */ - ctask = (struct iscsi_cmd_task *)session->cmds[itt]; - iser_ctask = ctask->dd_data; - iser_dbg("itt %d ctask %p\n",itt,ctask); - iser_ctask->status = ISER_TASK_STATUS_COMPLETED; - iser_ctask_rdma_finalize(iser_ctask); + "conn %p opcode %d itt %d\n", + conn->iscsi_conn, opcode, hdr->itt); + else { + iser_ctask = ctask->dd_data; + iser_dbg("itt %d ctask %p\n",hdr->itt, ctask); + iser_ctask->status = ISER_TASK_STATUS_COMPLETED; + iser_ctask_rdma_finalize(iser_ctask); + } } - iser_dto_buffs_release(dto); iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); -- cgit v1.2.3 From 2747fdb25726caa1a89229f43d99ca50af72576a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:08 -0500 Subject: [SCSI] iser: convert ib_iser to support merged tasks Convert ib_iser to support merged tasks. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 82 ++++++++++++++-------------- drivers/infiniband/ulp/iser/iscsi_iser.h | 14 ++--- drivers/infiniband/ulp/iser/iser_initiator.c | 42 +++++++------- 3 files changed, 68 insertions(+), 70 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 7b1468869066..baecca1ed42a 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -124,15 +124,23 @@ error: /** - * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands + * iscsi_iser_task_init - Initialize ctask + * @ctask: iscsi ctask * - **/ + * Initialize the ctask for the scsi command or mgmt command. + */ static int -iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) +iscsi_iser_task_init(struct iscsi_cmd_task *ctask) { - struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; + struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + /* mgmt ctask */ + if (!ctask->sc) { + iser_ctask->desc.data = ctask->data; + return 0; + } + iser_ctask->command_sent = 0; iser_ctask->iser_conn = iser_conn; iser_ctask_rdma_init(iser_ctask); @@ -140,9 +148,9 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) } /** - * iscsi_mtask_xmit - xmit management(immediate) task + * iscsi_iser_mtask_xmit - xmit management(immediate) ctask * @conn: iscsi connection - * @mtask: task management task + * @ctask: ctask management ctask * * Notes: * The function can return -EAGAIN in which case caller must @@ -151,20 +159,19 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) * **/ static int -iscsi_iser_mtask_xmit(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask) +iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { int error = 0; - debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt); + debug_scsi("ctask deq [cid %d itt 0x%x]\n", conn->id, ctask->itt); - error = iser_send_control(conn, mtask); + error = iser_send_control(conn, ctask); - /* since iser xmits control with zero copy, mtasks can not be recycled + /* since iser xmits control with zero copy, ctasks can not be recycled * right after sending them. * The recycling scheme is based on whether a response is expected - * - if yes, the mtask is recycled at iscsi_complete_pdu - * - if no, the mtask is recycled at iser_snd_completion + * - if yes, the ctask is recycled at iscsi_complete_pdu + * - if no, the ctask is recycled at iser_snd_completion */ if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); @@ -173,7 +180,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, } static int -iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, +iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { struct iscsi_data hdr; @@ -190,24 +197,27 @@ iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, error = iser_send_data_out(conn, ctask, &hdr); if (error) { ctask->unsol_datasn--; - goto iscsi_iser_ctask_xmit_unsol_data_exit; + goto iscsi_iser_task_xmit_unsol_data_exit; } ctask->unsol_count -= ctask->data_count; debug_scsi("Need to send %d more as data-out PDUs\n", ctask->unsol_count); } -iscsi_iser_ctask_xmit_unsol_data_exit: +iscsi_iser_task_xmit_unsol_data_exit: return error; } static int -iscsi_iser_ctask_xmit(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) +iscsi_iser_task_xmit(struct iscsi_cmd_task *ctask) { + struct iscsi_conn *conn = ctask->conn; struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; int error = 0; + if (!ctask->sc) + return iscsi_iser_mtask_xmit(conn, ctask); + if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) { BUG_ON(scsi_bufflen(ctask->sc) == 0); @@ -223,25 +233,29 @@ iscsi_iser_ctask_xmit(struct iscsi_conn *conn, if (!iser_ctask->command_sent) { error = iser_send_command(conn, ctask); if (error) - goto iscsi_iser_ctask_xmit_exit; + goto iscsi_iser_task_xmit_exit; iser_ctask->command_sent = 1; } /* Send unsolicited data-out PDU(s) if necessary */ if (ctask->unsol_count) - error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); + error = iscsi_iser_task_xmit_unsol_data(conn, ctask); - iscsi_iser_ctask_xmit_exit: + iscsi_iser_task_xmit_exit: if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } static void -iscsi_iser_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +iscsi_iser_cleanup_task(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + /* mgmt tasks do not need special cleanup */ + if (!ctask->sc) + return; + if (iser_ctask->status == ISER_TASK_STATUS_STARTED) { iser_ctask->status = ISER_TASK_STATUS_COMPLETED; iser_ctask_rdma_finalize(iser_ctask); @@ -394,10 +408,8 @@ iscsi_iser_session_create(struct Scsi_Host *shost, struct iscsi_cls_session *cls_session; struct iscsi_session *session; int i; - struct iscsi_cmd_task *ctask; - struct iscsi_mgmt_task *mtask; + struct iscsi_cmd_task *ctask; struct iscsi_iser_cmd_task *iser_ctask; - struct iser_desc *desc; if (shost) { printk(KERN_ERR "iscsi_tcp: invalid shost %d.\n", @@ -425,28 +437,19 @@ iscsi_iser_session_create(struct Scsi_Host *shost, cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, ISCSI_DEF_XMIT_CMDS_MAX, sizeof(struct iscsi_iser_cmd_task), - sizeof(struct iser_desc), initial_cmdsn); if (!cls_session) goto remove_host; session = cls_session->dd_data; - shost->can_queue = session->cmds_max; + shost->can_queue = session->scsi_cmds_max; /* libiscsi setup itts, data and pool so just set desc fields */ for (i = 0; i < session->cmds_max; i++) { - ctask = session->cmds[i]; + ctask = session->cmds[i]; iser_ctask = ctask->dd_data; ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header; ctask->hdr_max = sizeof(iser_ctask->desc.iscsi_header); } - - for (i = 0; i < session->mgmtpool_max; i++) { - mtask = session->mgmt_cmds[i]; - desc = mtask->dd_data; - mtask->hdr = &desc->iscsi_header; - desc->data = mtask->data; - } - return cls_session; remove_host: @@ -659,10 +662,9 @@ static struct iscsi_transport iscsi_iser_transport = { /* IO */ .send_pdu = iscsi_conn_send_pdu, .get_stats = iscsi_iser_conn_get_stats, - .init_cmd_task = iscsi_iser_cmd_init, - .xmit_cmd_task = iscsi_iser_ctask_xmit, - .xmit_mgmt_task = iscsi_iser_mtask_xmit, - .cleanup_cmd_task = iscsi_iser_cleanup_ctask, + .init_task = iscsi_iser_task_init, + .xmit_task = iscsi_iser_task_xmit, + .cleanup_task = iscsi_iser_cleanup_task, /* recovery */ .session_recovery_timedout = iscsi_session_recovery_timedout, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index bd5c1a554ea6..96a600f127c8 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -298,15 +298,15 @@ extern int iser_debug_level; /* allocate connection resources needed for rdma functionality */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); -int iser_send_control(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask); +int iser_send_control(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask); -int iser_send_command(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask); +int iser_send_command(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask); -int iser_send_data_out(struct iscsi_conn *conn, +int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, - struct iscsi_data *hdr); + struct iscsi_data *hdr); void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, @@ -326,7 +326,7 @@ void iser_rcv_completion(struct iser_desc *desc, void iser_snd_completion(struct iser_desc *desc); -void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); +void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index b82a5f2d4d37..4ea78fbeee95 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -300,13 +300,13 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) } static int -iser_check_xmit(struct iscsi_conn *conn, void *task) +iser_check_xmit(struct iscsi_conn *conn, void *ctask) { struct iscsi_iser_conn *iser_conn = conn->dd_data; if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == ISER_QP_MAX_REQ_DTOS) { - iser_dbg("%ld can't xmit task %p\n",jiffies,task); + iser_dbg("%ld can't xmit ctask %p\n",jiffies,ctask); return -ENOBUFS; } return 0; @@ -316,7 +316,7 @@ iser_check_xmit(struct iscsi_conn *conn, void *task) /** * iser_send_command - send command PDU */ -int iser_send_command(struct iscsi_conn *conn, +int iser_send_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { struct iscsi_iser_conn *iser_conn = conn->dd_data; @@ -395,7 +395,7 @@ send_command_error: /** * iser_send_data_out - send data out PDU */ -int iser_send_data_out(struct iscsi_conn *conn, +int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, struct iscsi_data *hdr) { @@ -470,10 +470,11 @@ send_data_out_error: } int iser_send_control(struct iscsi_conn *conn, - struct iscsi_mgmt_task *mtask) + struct iscsi_cmd_task *ctask) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_desc *mdesc = mtask->dd_data; + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iser_desc *mdesc = &iser_ctask->desc; struct iser_dto *send_dto = NULL; unsigned long data_seg_len; int err = 0; @@ -485,7 +486,7 @@ int iser_send_control(struct iscsi_conn *conn, return -EPERM; } - if (iser_check_xmit(conn,mtask)) + if (iser_check_xmit(conn, ctask)) return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ @@ -498,14 +499,14 @@ int iser_send_control(struct iscsi_conn *conn, iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); - data_seg_len = ntoh24(mtask->hdr->dlength); + data_seg_len = ntoh24(ctask->hdr->dlength); if (data_seg_len > 0) { regd_buf = &mdesc->data_regd_buf; memset(regd_buf, 0, sizeof(struct iser_regd_buf)); regd_buf->device = device; - regd_buf->virt_addr = mtask->data; - regd_buf->data_size = mtask->data_count; + regd_buf->virt_addr = ctask->data; + regd_buf->data_size = ctask->data_count; iser_reg_single(device, regd_buf, DMA_TO_DEVICE); iser_dto_add_regd_buff(send_dto, regd_buf, @@ -535,7 +536,7 @@ send_control_error: void iser_rcv_completion(struct iser_desc *rx_desc, unsigned long dto_xfer_len) { - struct iser_dto *dto = &rx_desc->dto; + struct iser_dto *dto = &rx_desc->dto; struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; struct iscsi_cmd_task *ctask; struct iscsi_iser_cmd_task *iser_ctask; @@ -559,7 +560,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, if (opcode == ISCSI_OP_SCSI_CMD_RSP) { ctask = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); if (!ctask) - iser_err("itt can't be matched to task!!! " + iser_err("itt can't be matched to ctask!!! " "conn %p opcode %d itt %d\n", conn->iscsi_conn, opcode, hdr->itt); else { @@ -577,7 +578,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, kmem_cache_free(ig.desc_cache, rx_desc); /* decrementing conn->post_recv_buf_count only --after-- freeing the * - * task eliminates the need to worry on tasks which are completed in * + * ctask eliminates the need to worry on ctasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ atomic_dec(&conn->ib_conn->post_recv_buf_count); @@ -589,7 +590,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iser_conn *ib_conn = dto->ib_conn; struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; - struct iscsi_mgmt_task *mtask; + struct iscsi_cmd_task *ctask; int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -612,15 +613,10 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ - mtask = (void *) ((long)(void *)tx_desc - - sizeof(struct iscsi_mgmt_task)); - if (mtask->hdr->itt == RESERVED_ITT) { - struct iscsi_session *session = conn->session; - - spin_lock(&conn->session->lock); - iscsi_free_mgmt_task(conn, mtask); - spin_unlock(&session->lock); - } + ctask = (void *) ((long)(void *)tx_desc - + sizeof(struct iscsi_cmd_task)); + if (ctask->hdr->itt == RESERVED_ITT) + iscsi_put_ctask(ctask); } } -- cgit v1.2.3 From 2261ec3d686e35c1a6088ab7f00a1d02b528b994 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:11 -0500 Subject: [SCSI] iser: handle iscsi_cmd_task rename This handles the iscsi_cmd_task rename and renames the iser cmd task to iser task. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 114 ++++++++-------- drivers/infiniband/ulp/iser/iscsi_iser.h | 24 ++-- drivers/infiniband/ulp/iser/iser_initiator.c | 190 +++++++++++++-------------- drivers/infiniband/ulp/iser/iser_memory.c | 77 +++++------ 4 files changed, 203 insertions(+), 202 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index baecca1ed42a..86d9c42f0d33 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -124,33 +124,33 @@ error: /** - * iscsi_iser_task_init - Initialize ctask - * @ctask: iscsi ctask + * iscsi_iser_task_init - Initialize task + * @task: iscsi task * - * Initialize the ctask for the scsi command or mgmt command. + * Initialize the task for the scsi command or mgmt command. */ static int -iscsi_iser_task_init(struct iscsi_cmd_task *ctask) +iscsi_iser_task_init(struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_conn *iser_conn = task->conn->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; - /* mgmt ctask */ - if (!ctask->sc) { - iser_ctask->desc.data = ctask->data; + /* mgmt task */ + if (!task->sc) { + iser_task->desc.data = task->data; return 0; } - iser_ctask->command_sent = 0; - iser_ctask->iser_conn = iser_conn; - iser_ctask_rdma_init(iser_ctask); + iser_task->command_sent = 0; + iser_task->iser_conn = iser_conn; + iser_task_rdma_init(iser_task); return 0; } /** - * iscsi_iser_mtask_xmit - xmit management(immediate) ctask + * iscsi_iser_mtask_xmit - xmit management(immediate) task * @conn: iscsi connection - * @ctask: ctask management ctask + * @task: task management task * * Notes: * The function can return -EAGAIN in which case caller must @@ -159,19 +159,19 @@ iscsi_iser_task_init(struct iscsi_cmd_task *ctask) * **/ static int -iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; - debug_scsi("ctask deq [cid %d itt 0x%x]\n", conn->id, ctask->itt); + debug_scsi("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); - error = iser_send_control(conn, ctask); + error = iser_send_control(conn, task); - /* since iser xmits control with zero copy, ctasks can not be recycled + /* since iser xmits control with zero copy, tasks can not be recycled * right after sending them. * The recycling scheme is based on whether a response is expected - * - if yes, the ctask is recycled at iscsi_complete_pdu - * - if no, the ctask is recycled at iser_snd_completion + * - if yes, the task is recycled at iscsi_complete_pdu + * - if no, the task is recycled at iser_snd_completion */ if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); @@ -181,27 +181,27 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) static int iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) + struct iscsi_task *task) { struct iscsi_data hdr; int error = 0; /* Send data-out PDUs while there's still unsolicited data to send */ - while (ctask->unsol_count > 0) { - iscsi_prep_unsolicit_data_pdu(ctask, &hdr); + while (task->unsol_count > 0) { + iscsi_prep_unsolicit_data_pdu(task, &hdr); debug_scsi("Sending data-out: itt 0x%x, data count %d\n", - hdr.itt, ctask->data_count); + hdr.itt, task->data_count); /* the buffer description has been passed with the command */ /* Send the command */ - error = iser_send_data_out(conn, ctask, &hdr); + error = iser_send_data_out(conn, task, &hdr); if (error) { - ctask->unsol_datasn--; + task->unsol_datasn--; goto iscsi_iser_task_xmit_unsol_data_exit; } - ctask->unsol_count -= ctask->data_count; + task->unsol_count -= task->data_count; debug_scsi("Need to send %d more as data-out PDUs\n", - ctask->unsol_count); + task->unsol_count); } iscsi_iser_task_xmit_unsol_data_exit: @@ -209,37 +209,37 @@ iscsi_iser_task_xmit_unsol_data_exit: } static int -iscsi_iser_task_xmit(struct iscsi_cmd_task *ctask) +iscsi_iser_task_xmit(struct iscsi_task *task) { - struct iscsi_conn *conn = ctask->conn; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_conn *conn = task->conn; + struct iscsi_iser_task *iser_task = task->dd_data; int error = 0; - if (!ctask->sc) - return iscsi_iser_mtask_xmit(conn, ctask); + if (!task->sc) + return iscsi_iser_mtask_xmit(conn, task); - if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) { - BUG_ON(scsi_bufflen(ctask->sc) == 0); + if (task->sc->sc_data_direction == DMA_TO_DEVICE) { + BUG_ON(scsi_bufflen(task->sc) == 0); debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", - ctask->itt, scsi_bufflen(ctask->sc), - ctask->imm_count, ctask->unsol_count); + task->itt, scsi_bufflen(task->sc), + task->imm_count, task->unsol_count); } - debug_scsi("ctask deq [cid %d itt 0x%x]\n", - conn->id, ctask->itt); + debug_scsi("task deq [cid %d itt 0x%x]\n", + conn->id, task->itt); /* Send the cmd PDU */ - if (!iser_ctask->command_sent) { - error = iser_send_command(conn, ctask); + if (!iser_task->command_sent) { + error = iser_send_command(conn, task); if (error) goto iscsi_iser_task_xmit_exit; - iser_ctask->command_sent = 1; + iser_task->command_sent = 1; } /* Send unsolicited data-out PDU(s) if necessary */ - if (ctask->unsol_count) - error = iscsi_iser_task_xmit_unsol_data(conn, ctask); + if (task->unsol_count) + error = iscsi_iser_task_xmit_unsol_data(conn, task); iscsi_iser_task_xmit_exit: if (error && error != -ENOBUFS) @@ -248,17 +248,17 @@ iscsi_iser_task_xmit(struct iscsi_cmd_task *ctask) } static void -iscsi_iser_cleanup_task(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +iscsi_iser_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; /* mgmt tasks do not need special cleanup */ - if (!ctask->sc) + if (!task->sc) return; - if (iser_ctask->status == ISER_TASK_STATUS_STARTED) { - iser_ctask->status = ISER_TASK_STATUS_COMPLETED; - iser_ctask_rdma_finalize(iser_ctask); + if (iser_task->status == ISER_TASK_STATUS_STARTED) { + iser_task->status = ISER_TASK_STATUS_COMPLETED; + iser_task_rdma_finalize(iser_task); } } @@ -408,8 +408,8 @@ iscsi_iser_session_create(struct Scsi_Host *shost, struct iscsi_cls_session *cls_session; struct iscsi_session *session; int i; - struct iscsi_cmd_task *ctask; - struct iscsi_iser_cmd_task *iser_ctask; + struct iscsi_task *task; + struct iscsi_iser_task *iser_task; if (shost) { printk(KERN_ERR "iscsi_tcp: invalid shost %d.\n", @@ -436,7 +436,7 @@ iscsi_iser_session_create(struct Scsi_Host *shost, */ cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, ISCSI_DEF_XMIT_CMDS_MAX, - sizeof(struct iscsi_iser_cmd_task), + sizeof(struct iscsi_iser_task), initial_cmdsn); if (!cls_session) goto remove_host; @@ -445,10 +445,10 @@ iscsi_iser_session_create(struct Scsi_Host *shost, shost->can_queue = session->scsi_cmds_max; /* libiscsi setup itts, data and pool so just set desc fields */ for (i = 0; i < session->cmds_max; i++) { - ctask = session->cmds[i]; - iser_ctask = ctask->dd_data; - ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header; - ctask->hdr_max = sizeof(iser_ctask->desc.iscsi_header); + task = session->cmds[i]; + iser_task = task->dd_data; + task->hdr = (struct iscsi_cmd *)&iser_task->desc.iscsi_header; + task->hdr_max = sizeof(iser_task->desc.iscsi_header); } return cls_session; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 96a600f127c8..05431f270fe8 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -173,7 +173,7 @@ struct iser_data_buf { /* fwd declarations */ struct iser_device; struct iscsi_iser_conn; -struct iscsi_iser_cmd_task; +struct iscsi_iser_task; struct iser_mem_reg { u32 lkey; @@ -197,7 +197,7 @@ struct iser_regd_buf { #define MAX_REGD_BUF_VECTOR_LEN 2 struct iser_dto { - struct iscsi_iser_cmd_task *ctask; + struct iscsi_iser_task *task; struct iser_conn *ib_conn; int notify_enable; @@ -265,7 +265,7 @@ struct iscsi_iser_conn { rwlock_t lock; }; -struct iscsi_iser_cmd_task { +struct iscsi_iser_task { struct iser_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_status status; @@ -299,13 +299,13 @@ extern int iser_debug_level; int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); int iser_send_control(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask); + struct iscsi_task *task); int iser_send_command(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask); + struct iscsi_task *task); int iser_send_data_out(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask, + struct iscsi_task *task, struct iscsi_data *hdr); void iscsi_iser_recv(struct iscsi_conn *conn, @@ -326,9 +326,9 @@ void iser_rcv_completion(struct iser_desc *desc, void iser_snd_completion(struct iser_desc *desc); -void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); +void iser_task_rdma_init(struct iscsi_iser_task *task); -void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask); +void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_dto_buffs_release(struct iser_dto *dto); @@ -338,10 +338,10 @@ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *ctask, +int iser_reg_rdma_mem(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, @@ -361,10 +361,10 @@ int iser_post_send(struct iser_desc *tx_desc); int iser_conn_state_comp(struct iser_conn *ib_conn, enum iser_ib_conn_state comp); -int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, +int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct iser_data_buf *data, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 4ea78fbeee95..35af60a23c61 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -66,46 +66,46 @@ static void iser_dto_add_regd_buff(struct iser_dto *dto, /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in - * iser_ctask->data[ISER_DIR_IN].data_len + * iser_task->data[ISER_DIR_IN].data_len */ -static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, +static int iser_prepare_read_cmd(struct iscsi_task *task, unsigned int edtl) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; - struct iser_hdr *hdr = &iser_ctask->desc.iser_header; - struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN]; + struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; - err = iser_dma_map_task_data(iser_ctask, + err = iser_dma_map_task_data(iser_task, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; - if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) { + if (edtl > iser_task->data[ISER_DIR_IN].data_len) { iser_err("Total data length: %ld, less than EDTL: " "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", - iser_ctask->data[ISER_DIR_IN].data_len, edtl, - ctask->itt, iser_ctask->iser_conn); + iser_task->data[ISER_DIR_IN].data_len, edtl, + task->itt, iser_task->iser_conn); return -EINVAL; } - err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN); + err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; } - regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN]; + regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->read_va = cpu_to_be64(regd_buf->reg.va); iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", - ctask->itt, regd_buf->reg.rkey, + task->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va); return 0; @@ -113,43 +113,43 @@ static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in - * ctask->data[ISER_DIR_OUT].data_len + * task->data[ISER_DIR_OUT].data_len */ static int -iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, +iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz, unsigned int unsol_sz, unsigned int edtl) { - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_regd_buf *regd_buf; int err; - struct iser_dto *send_dto = &iser_ctask->desc.dto; - struct iser_hdr *hdr = &iser_ctask->desc.iser_header; - struct iser_data_buf *buf_out = &iser_ctask->data[ISER_DIR_OUT]; + struct iser_dto *send_dto = &iser_task->desc.dto; + struct iser_hdr *hdr = &iser_task->desc.iser_header; + struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; - err = iser_dma_map_task_data(iser_ctask, + err = iser_dma_map_task_data(iser_task, buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return err; - if (edtl > iser_ctask->data[ISER_DIR_OUT].data_len) { + if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Total data length: %ld, less than EDTL: %d, " "in WRITE cmd BHS itt: %d, conn: 0x%p\n", - iser_ctask->data[ISER_DIR_OUT].data_len, - edtl, ctask->itt, ctask->conn); + iser_task->data[ISER_DIR_OUT].data_len, + edtl, task->itt, task->conn); return -EINVAL; } - err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_OUT); + err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; } - regd_buf = &iser_ctask->rdma_regd[ISER_DIR_OUT]; + regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; if (unsol_sz < edtl) { hdr->flags |= ISER_WSV; @@ -158,13 +158,13 @@ iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " "VA:%#llX + unsol:%d\n", - ctask->itt, regd_buf->reg.rkey, + task->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va, unsol_sz); } if (imm_sz > 0) { iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", - ctask->itt, imm_sz); + task->itt, imm_sz); iser_dto_add_regd_buff(send_dto, regd_buf, 0, @@ -300,13 +300,13 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) } static int -iser_check_xmit(struct iscsi_conn *conn, void *ctask) +iser_check_xmit(struct iscsi_conn *conn, void *task) { struct iscsi_iser_conn *iser_conn = conn->dd_data; if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == ISER_QP_MAX_REQ_DTOS) { - iser_dbg("%ld can't xmit ctask %p\n",jiffies,ctask); + iser_dbg("%ld can't xmit task %p\n",jiffies,task); return -ENOBUFS; } return 0; @@ -317,37 +317,37 @@ iser_check_xmit(struct iscsi_conn *conn, void *ctask) * iser_send_command - send command PDU */ int iser_send_command(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) + struct iscsi_task *task) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_dto *send_dto = NULL; unsigned long edtl; int err = 0; struct iser_data_buf *data_buf; - struct iscsi_cmd *hdr = ctask->hdr; - struct scsi_cmnd *sc = ctask->sc; + struct iscsi_cmd *hdr = task->hdr; + struct scsi_cmnd *sc = task->sc; if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); return -EPERM; } - if (iser_check_xmit(conn, ctask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; edtl = ntohl(hdr->data_length); /* build the tx desc regd header and add it to the tx desc dto */ - iser_ctask->desc.type = ISCSI_TX_SCSI_COMMAND; - send_dto = &iser_ctask->desc.dto; - send_dto->ctask = iser_ctask; - iser_create_send_desc(iser_conn, &iser_ctask->desc); + iser_task->desc.type = ISCSI_TX_SCSI_COMMAND; + send_dto = &iser_task->desc.dto; + send_dto->task = iser_task; + iser_create_send_desc(iser_conn, &iser_task->desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) - data_buf = &iser_ctask->data[ISER_DIR_IN]; + data_buf = &iser_task->data[ISER_DIR_IN]; else - data_buf = &iser_ctask->data[ISER_DIR_OUT]; + data_buf = &iser_task->data[ISER_DIR_OUT]; if (scsi_sg_count(sc)) { /* using a scatter list */ data_buf->buf = scsi_sglist(sc); @@ -357,15 +357,15 @@ int iser_send_command(struct iscsi_conn *conn, data_buf->data_len = scsi_bufflen(sc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { - err = iser_prepare_read_cmd(ctask, edtl); + err = iser_prepare_read_cmd(task, edtl); if (err) goto send_command_error; } if (hdr->flags & ISCSI_FLAG_CMD_WRITE) { - err = iser_prepare_write_cmd(ctask, - ctask->imm_count, - ctask->imm_count + - ctask->unsol_count, + err = iser_prepare_write_cmd(task, + task->imm_count, + task->imm_count + + task->unsol_count, edtl); if (err) goto send_command_error; @@ -380,15 +380,15 @@ int iser_send_command(struct iscsi_conn *conn, goto send_command_error; } - iser_ctask->status = ISER_TASK_STATUS_STARTED; + iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(&iser_ctask->desc); + err = iser_post_send(&iser_task->desc); if (!err) return 0; send_command_error: iser_dto_buffs_release(send_dto); - iser_err("conn %p failed ctask->itt %d err %d\n",conn, ctask->itt, err); + iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); return err; } @@ -396,11 +396,11 @@ send_command_error: * iser_send_data_out - send data out PDU */ int iser_send_data_out(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask, + struct iscsi_task *task, struct iscsi_data *hdr) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iscsi_iser_task *iser_task = task->dd_data; struct iser_desc *tx_desc = NULL; struct iser_dto *send_dto = NULL; unsigned long buf_offset; @@ -413,7 +413,7 @@ int iser_send_data_out(struct iscsi_conn *conn, return -EPERM; } - if (iser_check_xmit(conn, ctask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; itt = (__force uint32_t)hdr->itt; @@ -434,7 +434,7 @@ int iser_send_data_out(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ send_dto = &tx_desc->dto; - send_dto->ctask = iser_ctask; + send_dto->task = iser_task; iser_create_send_desc(iser_conn, tx_desc); iser_reg_single(iser_conn->ib_conn->device, @@ -442,15 +442,15 @@ int iser_send_data_out(struct iscsi_conn *conn, /* all data was registered for RDMA, we can use the lkey */ iser_dto_add_regd_buff(send_dto, - &iser_ctask->rdma_regd[ISER_DIR_OUT], + &iser_task->rdma_regd[ISER_DIR_OUT], buf_offset, data_seg_len); - if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) { + if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Offset:%ld & DSL:%ld in Data-Out " "inconsistent with total len:%ld, itt:%d\n", buf_offset, data_seg_len, - iser_ctask->data[ISER_DIR_OUT].data_len, itt); + iser_task->data[ISER_DIR_OUT].data_len, itt); err = -EINVAL; goto send_data_out_error; } @@ -470,11 +470,11 @@ send_data_out_error: } int iser_send_control(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) + struct iscsi_task *task) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; - struct iser_desc *mdesc = &iser_ctask->desc; + struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_desc *mdesc = &iser_task->desc; struct iser_dto *send_dto = NULL; unsigned long data_seg_len; int err = 0; @@ -486,27 +486,27 @@ int iser_send_control(struct iscsi_conn *conn, return -EPERM; } - if (iser_check_xmit(conn, ctask)) + if (iser_check_xmit(conn, task)) return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; send_dto = &mdesc->dto; - send_dto->ctask = NULL; + send_dto->task = NULL; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn->device; iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); - data_seg_len = ntoh24(ctask->hdr->dlength); + data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { regd_buf = &mdesc->data_regd_buf; memset(regd_buf, 0, sizeof(struct iser_regd_buf)); regd_buf->device = device; - regd_buf->virt_addr = ctask->data; - regd_buf->data_size = ctask->data_count; + regd_buf->virt_addr = task->data; + regd_buf->data_size = task->data_count; iser_reg_single(device, regd_buf, DMA_TO_DEVICE); iser_dto_add_regd_buff(send_dto, regd_buf, @@ -538,8 +538,8 @@ void iser_rcv_completion(struct iser_desc *rx_desc, { struct iser_dto *dto = &rx_desc->dto; struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; - struct iscsi_cmd_task *ctask; - struct iscsi_iser_cmd_task *iser_ctask; + struct iscsi_task *task; + struct iscsi_iser_task *iser_task; struct iscsi_hdr *hdr; char *rx_data = NULL; int rx_data_len = 0; @@ -558,16 +558,16 @@ void iser_rcv_completion(struct iser_desc *rx_desc, opcode = hdr->opcode & ISCSI_OPCODE_MASK; if (opcode == ISCSI_OP_SCSI_CMD_RSP) { - ctask = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); - if (!ctask) - iser_err("itt can't be matched to ctask!!! " + task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); + if (!task) + iser_err("itt can't be matched to task!!! " "conn %p opcode %d itt %d\n", conn->iscsi_conn, opcode, hdr->itt); else { - iser_ctask = ctask->dd_data; - iser_dbg("itt %d ctask %p\n",hdr->itt, ctask); - iser_ctask->status = ISER_TASK_STATUS_COMPLETED; - iser_ctask_rdma_finalize(iser_ctask); + iser_task = task->dd_data; + iser_dbg("itt %d task %p\n",hdr->itt, task); + iser_task->status = ISER_TASK_STATUS_COMPLETED; + iser_task_rdma_finalize(iser_task); } } iser_dto_buffs_release(dto); @@ -578,7 +578,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, kmem_cache_free(ig.desc_cache, rx_desc); /* decrementing conn->post_recv_buf_count only --after-- freeing the * - * ctask eliminates the need to worry on ctasks which are completed in * + * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ atomic_dec(&conn->ib_conn->post_recv_buf_count); @@ -590,7 +590,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iser_conn *ib_conn = dto->ib_conn; struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; - struct iscsi_cmd_task *ctask; + struct iscsi_task *task; int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -613,31 +613,31 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ - ctask = (void *) ((long)(void *)tx_desc - - sizeof(struct iscsi_cmd_task)); - if (ctask->hdr->itt == RESERVED_ITT) - iscsi_put_ctask(ctask); + task = (void *) ((long)(void *)tx_desc - + sizeof(struct iscsi_task)); + if (task->hdr->itt == RESERVED_ITT) + iscsi_put_task(task); } } -void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask) +void iser_task_rdma_init(struct iscsi_iser_task *iser_task) { - iser_ctask->status = ISER_TASK_STATUS_INIT; + iser_task->status = ISER_TASK_STATUS_INIT; - iser_ctask->dir[ISER_DIR_IN] = 0; - iser_ctask->dir[ISER_DIR_OUT] = 0; + iser_task->dir[ISER_DIR_IN] = 0; + iser_task->dir[ISER_DIR_OUT] = 0; - iser_ctask->data[ISER_DIR_IN].data_len = 0; - iser_ctask->data[ISER_DIR_OUT].data_len = 0; + iser_task->data[ISER_DIR_IN].data_len = 0; + iser_task->data[ISER_DIR_OUT].data_len = 0; - memset(&iser_ctask->rdma_regd[ISER_DIR_IN], 0, + memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, sizeof(struct iser_regd_buf)); - memset(&iser_ctask->rdma_regd[ISER_DIR_OUT], 0, + memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, sizeof(struct iser_regd_buf)); } -void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) +void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { int deferred; int is_rdma_aligned = 1; @@ -646,17 +646,17 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ - if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) { + if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); + iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); } - if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) { + if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); + iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); } - if (iser_ctask->dir[ISER_DIR_IN]) { - regd = &iser_ctask->rdma_regd[ISER_DIR_IN]; + if (iser_task->dir[ISER_DIR_IN]) { + regd = &iser_task->rdma_regd[ISER_DIR_IN]; deferred = iser_regd_buff_release(regd); if (deferred) { iser_err("%d references remain for BUF-IN rdma reg\n", @@ -664,8 +664,8 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) } } - if (iser_ctask->dir[ISER_DIR_OUT]) { - regd = &iser_ctask->rdma_regd[ISER_DIR_OUT]; + if (iser_task->dir[ISER_DIR_OUT]) { + regd = &iser_task->rdma_regd[ISER_DIR_OUT]; deferred = iser_regd_buff_release(regd); if (deferred) { iser_err("%d references remain for BUF-OUT rdma reg\n", @@ -675,7 +675,7 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) /* if the data was unaligned, it was already unmapped and then copied */ if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_ctask); + iser_dma_unmap_task_data(iser_task); } void iser_dto_buffs_release(struct iser_dto *dto) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index cac50c4dc159..48f2a601fc27 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -101,13 +101,13 @@ void iser_reg_single(struct iser_device *device, /** * iser_start_rdma_unaligned_sg */ -static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, +static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { int dma_nents; struct ib_device *dev; char *mem = NULL; - struct iser_data_buf *data = &iser_ctask->data[cmd_dir]; + struct iser_data_buf *data = &iser_task->data[cmd_dir]; unsigned long cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) @@ -140,37 +140,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, } } - sg_init_one(&iser_ctask->data_copy[cmd_dir].sg_single, mem, cmd_data_len); - iser_ctask->data_copy[cmd_dir].buf = - &iser_ctask->data_copy[cmd_dir].sg_single; - iser_ctask->data_copy[cmd_dir].size = 1; + sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len); + iser_task->data_copy[cmd_dir].buf = + &iser_task->data_copy[cmd_dir].sg_single; + iser_task->data_copy[cmd_dir].size = 1; - iser_ctask->data_copy[cmd_dir].copy_buf = mem; + iser_task->data_copy[cmd_dir].copy_buf = mem; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn->device->ib_device; dma_nents = ib_dma_map_sg(dev, - &iser_ctask->data_copy[cmd_dir].sg_single, + &iser_task->data_copy[cmd_dir].sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); BUG_ON(dma_nents == 0); - iser_ctask->data_copy[cmd_dir].dma_nents = dma_nents; + iser_task->data_copy[cmd_dir].dma_nents = dma_nents; return 0; } /** * iser_finalize_rdma_unaligned_sg */ -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct ib_device *dev; struct iser_data_buf *mem_copy; unsigned long cmd_data_len; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; - mem_copy = &iser_ctask->data_copy[cmd_dir]; + dev = iser_task->iser_conn->ib_conn->device->ib_device; + mem_copy = &iser_task->data_copy[cmd_dir]; ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? @@ -186,8 +186,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, /* copy back read RDMA to unaligned sg */ mem = mem_copy->copy_buf; - sgl = (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf; - sg_size = iser_ctask->data[ISER_DIR_IN].size; + sgl = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf; + sg_size = iser_task->data[ISER_DIR_IN].size; p = mem; for_each_sg(sgl, sg, sg_size, i) { @@ -200,7 +200,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, } } - cmd_data_len = iser_ctask->data[cmd_dir].data_len; + cmd_data_len = iser_task->data[cmd_dir].data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) free_pages((unsigned long)mem_copy->copy_buf, @@ -378,15 +378,15 @@ static void iser_page_vec_build(struct iser_data_buf *data, } } -int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir) +int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) { struct ib_device *dev; - iser_ctask->dir[iser_dir] = 1; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + iser_task->dir[iser_dir] = 1; + dev = iser_task->iser_conn->ib_conn->device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -396,20 +396,20 @@ int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, return 0; } -void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) { struct ib_device *dev; struct iser_data_buf *data; - dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + dev = iser_task->iser_conn->ib_conn->device->ib_device; - if (iser_ctask->dir[ISER_DIR_IN]) { - data = &iser_ctask->data[ISER_DIR_IN]; + if (iser_task->dir[ISER_DIR_IN]) { + data = &iser_task->data[ISER_DIR_IN]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } - if (iser_ctask->dir[ISER_DIR_OUT]) { - data = &iser_ctask->data[ISER_DIR_OUT]; + if (iser_task->dir[ISER_DIR_OUT]) { + data = &iser_task->data[ISER_DIR_OUT]; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); } } @@ -420,21 +420,21 @@ void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) * * returns 0 on success, errno code on failure */ -int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, +int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iscsi_conn *iscsi_conn = iser_ctask->iser_conn->iscsi_conn; - struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; + struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; - struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; + struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf; int aligned_len; int err; int i; struct scatterlist *sg; - regd_buf = &iser_ctask->rdma_regd[cmd_dir]; + regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { @@ -444,13 +444,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_ctask); + iser_dma_unmap_task_data(iser_task); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) + if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) return -ENOMEM; - mem = &iser_ctask->data_copy[cmd_dir]; + mem = &iser_task->data_copy[cmd_dir]; } /* if there a single dma entry, FMR is not needed */ @@ -474,8 +474,9 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); if (err) { iser_data_buf_dump(mem, ibdev); - iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, - ntoh24(iser_ctask->desc.iscsi_header.dlength)); + iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", + mem->dma_nents, + ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", ib_conn->page_vec->data_size, ib_conn->page_vec->length, ib_conn->page_vec->offset); -- cgit v1.2.3 From 7970634b81a6e3561954517bca42615542c4535b Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:12 -0500 Subject: [SCSI] iscsi class: user device_for_each_child instead of duplicating session list Currently we duplicate the list of sessions, because we were using the test for if a session was on the host list to indicate if the session was bound or unbound. We can instead use the target_id and fix up the class so that drivers like bnx2i do not have to manage the target id space. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 86d9c42f0d33..3a89039e9a96 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -437,7 +437,7 @@ iscsi_iser_session_create(struct Scsi_Host *shost, cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, ISCSI_DEF_XMIT_CMDS_MAX, sizeof(struct iscsi_iser_task), - initial_cmdsn); + initial_cmdsn, 0); if (!cls_session) goto remove_host; session = cls_session->dd_data; -- cgit v1.2.3 From 412eeafa0a51a8d86545d0be637bf84e4374fccf Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:14 -0500 Subject: [SCSI] iser: Modify iser to take a iscsi_endpoint struct in ep callouts and session setup This hooks iser into the iscsi endpoint code. Previously it handled the lookup and allocation. This has been made generic so bnx2i and iser can share it. It also allows us to pass iser the leading conn's ep, so we know the ib_deivce being used and can set it as the scsi_host's parent. And that allows scsi-ml to set the dma_mask based on those values. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 87 ++++++++++++++------------------ drivers/infiniband/ulp/iser/iscsi_iser.h | 4 +- drivers/infiniband/ulp/iser/iser_verbs.c | 14 +---- 3 files changed, 43 insertions(+), 62 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3a89039e9a96..42e95b833092 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -262,24 +262,6 @@ iscsi_iser_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task) } } -static struct iser_conn * -iscsi_iser_ib_conn_lookup(__u64 ep_handle) -{ - struct iser_conn *ib_conn; - struct iser_conn *uib_conn = (struct iser_conn *)(unsigned long)ep_handle; - - mutex_lock(&ig.connlist_mutex); - list_for_each_entry(ib_conn, &ig.connlist, conn_list) { - if (ib_conn == uib_conn) { - mutex_unlock(&ig.connlist_mutex); - return ib_conn; - } - } - mutex_unlock(&ig.connlist_mutex); - iser_err("no conn exists for eph %llx\n",(unsigned long long)ep_handle); - return NULL; -} - static struct iscsi_cls_conn * iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) { @@ -335,6 +317,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn; struct iser_conn *ib_conn; + struct iscsi_endpoint *ep; int error; error = iscsi_conn_bind(cls_session, cls_conn, is_leading); @@ -343,12 +326,14 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, /* the transport ep handle comes from user space so it must be * verified against the global ib connections list */ - ib_conn = iscsi_iser_ib_conn_lookup(transport_eph); - if (!ib_conn) { + ep = iscsi_lookup_endpoint(transport_eph); + if (!ep) { iser_err("can't bind eph %llx\n", (unsigned long long)transport_eph); return -EINVAL; } + ib_conn = ep->dd_data; + /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ @@ -401,21 +386,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) } static struct iscsi_cls_session * -iscsi_iser_session_create(struct Scsi_Host *shost, +iscsi_iser_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, uint32_t initial_cmdsn, uint32_t *hostno) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; + struct Scsi_Host *shost; int i; struct iscsi_task *task; struct iscsi_iser_task *iser_task; - - if (shost) { - printk(KERN_ERR "iscsi_tcp: invalid shost %d.\n", - shost->host_no); - return NULL; - } + struct iser_conn *ib_conn; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, ISCSI_MAX_CMD_PER_LUN); if (!shost) @@ -426,7 +407,15 @@ iscsi_iser_session_create(struct Scsi_Host *shost, shost->max_channel = 0; shost->max_cmd_len = 16; - if (iscsi_host_add(shost, NULL)) + /* + * older userspace tools (before 2.0-870) did not pass us + * the leading conn's ep so this will be NULL; + */ + if (ep) + ib_conn = ep->dd_data; + + if (iscsi_host_add(shost, + ep ? ib_conn->device->ib_device->dma_device : NULL)) goto free_host; *hostno = shost->host_no; @@ -529,34 +518,37 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s stats->custom[3].value = conn->fmr_unalign_cnt; } -static int -iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking, - __u64 *ep_handle) +static struct iscsi_endpoint * +iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking) { int err; struct iser_conn *ib_conn; + struct iscsi_endpoint *ep; - err = iser_conn_init(&ib_conn); - if (err) - goto out; + ep = iscsi_create_endpoint(sizeof(*ib_conn)); + if (!ep) + return ERR_PTR(-ENOMEM); - err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, non_blocking); - if (!err) - *ep_handle = (__u64)(unsigned long)ib_conn; + ib_conn = ep->dd_data; + ib_conn->ep = ep; + iser_conn_init(ib_conn); -out: - return err; + err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, + non_blocking); + if (err) { + iscsi_destroy_endpoint(ep); + return ERR_PTR(err); + } + return ep; } static int -iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) +iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { - struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); + struct iser_conn *ib_conn; int rc; - if (!ib_conn) - return -EINVAL; - + ib_conn = ep->dd_data; rc = wait_event_interruptible_timeout(ib_conn->wait, ib_conn->state == ISER_CONN_UP, msecs_to_jiffies(timeout_ms)); @@ -578,14 +570,11 @@ iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) } static void -iscsi_iser_ep_disconnect(__u64 ep_handle) +iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { struct iser_conn *ib_conn; - ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); - if (!ib_conn) - return; - + ib_conn = ep->dd_data; if (ib_conn->iser_conn) /* * Must suspend xmit path if the ep is bound to the diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 05431f270fe8..cdf48763b082 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -174,6 +174,7 @@ struct iser_data_buf { struct iser_device; struct iscsi_iser_conn; struct iscsi_iser_task; +struct iscsi_endpoint; struct iser_mem_reg { u32 lkey; @@ -241,6 +242,7 @@ struct iser_device { struct iser_conn { struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ + struct iscsi_endpoint *ep; enum iser_ib_conn_state state; /* rdma connection state */ atomic_t refcount; spinlock_t lock; /* used for state changes */ @@ -313,7 +315,7 @@ void iscsi_iser_recv(struct iscsi_conn *conn, char *rx_data, int rx_data_len); -int iser_conn_init(struct iser_conn **ib_conn); +void iser_conn_init(struct iser_conn *ib_conn); void iser_conn_get(struct iser_conn *ib_conn); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 5daed2bd710e..81b45d4d9aa9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -325,7 +325,7 @@ static void iser_conn_release(struct iser_conn *ib_conn) iser_device_try_release(device); if (ib_conn->iser_conn) ib_conn->iser_conn->ib_conn = NULL; - kfree(ib_conn); + iscsi_destroy_endpoint(ib_conn->ep); } void iser_conn_get(struct iser_conn *ib_conn) @@ -494,15 +494,8 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve return ret; } -int iser_conn_init(struct iser_conn **ibconn) +void iser_conn_init(struct iser_conn *ib_conn) { - struct iser_conn *ib_conn; - - ib_conn = kzalloc(sizeof *ib_conn, GFP_KERNEL); - if (!ib_conn) { - iser_err("can't alloc memory for struct iser_conn\n"); - return -ENOMEM; - } ib_conn->state = ISER_CONN_INIT; init_waitqueue_head(&ib_conn->wait); atomic_set(&ib_conn->post_recv_buf_count, 0); @@ -510,9 +503,6 @@ int iser_conn_init(struct iser_conn **ibconn) atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); - - *ibconn = ib_conn; - return 0; } /** -- cgit v1.2.3 From 88dfd340b9dece8fcaa1a2d4c782338926c017f7 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:16 -0500 Subject: [SCSI] iscsi class: Add session initiatorname and ifacename sysfs attrs. This adds two new attrs used for creating initiator ports and binding sessions to hardware. The session level initiatorname: Since bnx2i does a scsi_host per host device, we need to add the iface initiator port settings on the session, so we can create multiple initiator ports (each with different inames) per device/scsi_host. The current iname reflects that qla4xxx can have one iname per hba, and we are allocating a host per session for software. The iname on the host will remain so we can export and set the hba level qla4xxx setting. The ifacename attr: To bind a session to a some peice of hardware in userspace we maintain some mappings, but during boot or iscsid restart (iscsid contains the user space part of the driver) we need to be able to figure out which of those host mappings abstractions maps to certain sessions. This patch adds a ifacename attr, which userspace can set to id the host side of the endpoint across pivot_roots and iscsid restarts. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 42e95b833092..08edbaf89223 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -629,7 +629,8 @@ static struct iscsi_transport iscsi_iser_transport = { ISCSI_USERNAME | ISCSI_PASSWORD | ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN | ISCSI_FAST_ABORT | ISCSI_ABORT_TMO | - ISCSI_PING_TMO | ISCSI_RECV_TMO, + ISCSI_PING_TMO | ISCSI_RECV_TMO | + ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME, .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME | ISCSI_HOST_INITIATOR_NAME, -- cgit v1.2.3 From 913e5bf435617aa529919a4f7567f849f9f35f9f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 21 May 2008 15:54:18 -0500 Subject: [SCSI] libiscsi, iser, tcp: remove recv_lock The recv lock was defined so the iscsi layer could block the recv path from processing IO during recovery. It turns out iser just set a lock to that pointer which was pointless. We now disconnect the transport connection before doing recovery so we do not need the recv lock. For iscsi_tcp we still stop the recv path incase older tools are being used. This patch also has iscsi_itt_to_ctask user grab the session lock and has the caller access the task with the lock or get a ref to it in case the target is broken and sends a tmf success response then sends data or a response for the command that was supposed to be affected bty the tmf. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 20 ++++++++++---------- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 -- drivers/infiniband/ulp/iser/iser_initiator.c | 6 ++++++ 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 08edbaf89223..c02eabd383a1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -281,9 +281,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) conn->max_recv_dlength = 128; iser_conn = conn->dd_data; - /* currently this is the only field which need to be initiated */ - rwlock_init(&iser_conn->lock); - conn->dd_data = iser_conn; iser_conn->iscsi_conn = conn; @@ -342,9 +339,6 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, ib_conn->iser_conn = iser_conn; iser_conn->ib_conn = ib_conn; iser_conn_get(ib_conn); - - conn->recv_lock = &iser_conn->lock; - return 0; } @@ -355,12 +349,18 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iser_conn *ib_conn = iser_conn->ib_conn; - iscsi_conn_stop(cls_conn, flag); /* - * There is no unbind event so the stop callback - * must release the ref from the bind. + * Userspace may have goofed up and not bound the connection or + * might have only partially setup the connection. */ - iser_conn_put(ib_conn); + if (ib_conn) { + iscsi_conn_stop(cls_conn, flag); + /* + * There is no unbind event so the stop callback + * must release the ref from the bind. + */ + iser_conn_put(ib_conn); + } iser_conn->ib_conn = NULL; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index cdf48763b082..a547edeea969 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -263,8 +263,6 @@ struct iser_conn { struct iscsi_iser_conn { struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ struct iser_conn *ib_conn; /* iSER IB conn */ - - rwlock_t lock; }; struct iscsi_iser_task { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 35af60a23c61..c36083922134 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -558,7 +558,12 @@ void iser_rcv_completion(struct iser_desc *rx_desc, opcode = hdr->opcode & ISCSI_OPCODE_MASK; if (opcode == ISCSI_OP_SCSI_CMD_RSP) { + spin_lock(&conn->iscsi_conn->session->lock); task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); + if (task) + __iscsi_get_task(task); + spin_unlock(&conn->iscsi_conn->session->lock); + if (!task) iser_err("itt can't be matched to task!!! " "conn %p opcode %d itt %d\n", @@ -568,6 +573,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, iser_dbg("itt %d task %p\n",hdr->itt, task); iser_task->status = ISER_TASK_STATUS_COMPLETED; iser_task_rdma_finalize(iser_task); + iscsi_put_task(task); } } iser_dto_buffs_release(dto); -- cgit v1.2.3 From 8e9a20cee4511be4560f9c858d9994eb6913731e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 16 Jun 2008 10:11:33 -0500 Subject: [SCSI] libiscsi, iscsi_tcp, ib_iser: fix setting of can_queue with old tools. This patch fixes two bugs that are related. 1. Old tools did not set can_queue/cmds_max. This patch modifies libiscsi so that when we add the host we catch this and set it to the default. 2. iscsi_tcp thought that the scsi command that was passed to the eh functions needed a iscsi_cmd_task allocated for it. It only needed a mgmt task, and now it does not matter since it all comes from the same pool and libiscsi handles this for the drivers. ib_iser had copied iscsi_tcp's code and set can_queue to its max - 1 to handle this. So this patch removes the max -1, and just sets it to the max. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index c02eabd383a1..a56931e03976 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -595,7 +595,6 @@ static struct scsi_host_template iscsi_iser_sht = { .name = "iSCSI Initiator over iSER, v." DRV_VER, .queuecommand = iscsi_queuecommand, .change_queue_depth = iscsi_change_queue_depth, - .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, .max_sectors = 1024, .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, -- cgit v1.2.3 From 969a60f9db3f879f95bd37026a3c3bf02cc2568f Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:43 -0700 Subject: IB/srp: Remove use of cached P_Key/GID queries The SRP initiator is currently using ib_find_cached_pkey() and ib_get_cached_gid() in situations where the uncached ib_find_pkey() and ib_query_gid() functions serve just as well: sleeping is allowed and performance is not an issue. Since we want to eliminate the cached operations in the long term, convert SRP to use the uncached variants. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 435145709dd6..81cc59ca5595 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -49,8 +49,6 @@ #include #include -#include - #include "ib_srp.h" #define DRV_NAME "ib_srp" @@ -183,10 +181,10 @@ static int srp_init_qp(struct srp_target_port *target, if (!attr) return -ENOMEM; - ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, - target->srp_host->port, - be16_to_cpu(target->path.pkey), - &attr->pkey_index); + ret = ib_find_pkey(target->srp_host->srp_dev->dev, + target->srp_host->port, + be16_to_cpu(target->path.pkey), + &attr->pkey_index); if (ret) goto out; @@ -1883,8 +1881,7 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err; - ib_get_cached_gid(host->srp_dev->dev, host->port, 0, - &target->path.sgid); + ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid); shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " -- cgit v1.2.3 From f3781d2e89f12dd5afa046dc56032af6e39bd116 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: RDMA: Remove subversion $Id tags They don't get updated by git and so they're worse than useless. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 -- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 -- drivers/infiniband/ulp/iser/iscsi_iser.c | 3 --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 -- drivers/infiniband/ulp/iser/iser_initiator.c | 2 -- drivers/infiniband/ulp/iser/iser_memory.c | 2 -- drivers/infiniband/ulp/iser/iser_verbs.c | 2 -- drivers/infiniband/ulp/srp/ib_srp.c | 2 -- drivers/infiniband/ulp/srp/ib_srp.h | 2 -- 15 files changed, 31 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca126fc2b853..0dcbab3203c9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $ */ #ifndef _IPOIB_H diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 97e67d36378f..91c959299910 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 8b882bbd1d05..961c585da216 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f429bce24c20..eca8518d79a0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2442090ac8d1..f217b1edd0ac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $ */ #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 3f663fb852c1..4a6538b9301a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8766d29ce3b7..810790ae7530 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ */ #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 1cdb5cfb0ff1..b08eb56196d3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index aeb58cae9a3f..356fac6d105a 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -42,9 +42,6 @@ * Zhenyu Wang * Modified by: * Erez Zilber - * - * - * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $ */ #include diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a8c1b300e34d..0e10703cf59e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -36,8 +36,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $ */ #ifndef __ISCSI_ISER_H__ #define __ISCSI_ISER_H__ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 08dc81c46f41..31ad498bdc51 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include #include diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index cac50c4dc159..81e49cb10ed3 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include #include diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index d19cfe605ebb..77cabee7cc08 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ */ #include #include diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 81cc59ca5595..ed7c5f72cb8b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $ */ #include diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 63d2ae724061..e185b907fc12 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $ */ #ifndef IB_SRP_H -- cgit v1.2.3 From f89271da32bc1a636cf4eb078e615930886cd013 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: IPoIB: Copy small received SKBs in connected mode The connected mode implementation in the IPoIB driver has a large overhead in the way SKBs are handled in the receive flow. It usually allocates an SKB with as big as was used in the currently received SKB and moves unused fragments from the old SKB to the new one. This involves a loop on all the remaining fragments and incurs overhead on the CPU. This patch, for small SKBs, allocates an SKB just large enough to contain the received data and copies to it the data from the received SKB. The newly allocated SKB is passed to the stack and the old SKB is reposted. When running netperf, UDP small messages, without this pach I get: UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 14.4.3.178 (14.4.3.178) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 114688 128 10.00 5142034 0 526.31 114688 10.00 1130489 115.71 With this patch I get both send and receive at ~315 mbps. The reason that send performance actually slows down is as follows: When using this patch, the overhead of the CPU for handling RX packets is dramatically reduced. As a result, we do not experience RNR NAK messages from the receiver which cause the connection to be closed and reopened again; when the patch is not used, the receiver cannot handle the packets fast enough so there is less time to post new buffers and hence the mentioned RNR NACKs. So what happens is that the application *thinks* it posted a certain number of packets for transmission but these packets are flushed and do not really get transmitted. Since the connection gets opened and closed many times, each time netperf gets the CPU time that otherwise would have been given to IPoIB to actually transmit the packets. This can be verified when looking at the port counters -- the output of ifconfig and the oputput of netperf (this is for the case without the patch): tx packets ========== port counter: 1,543,996 ifconfig: 1,581,426 netperf: 5,142,034 rx packets ========== netperf 1,1304,089 Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 19 +++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 ++++++ 3 files changed, 26 insertions(+) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0dcbab3203c9..8754b364f229 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -95,6 +95,7 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, MAX_SEND_CQE = 16, + IPOIB_CM_COPYBREAK = 256, }; #define IPOIB_OP_RECV (1ul << 31) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 91c959299910..6223fc39af70 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -523,6 +523,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) u64 mapping[IPOIB_CM_RX_SG]; int frags; int has_srq; + struct sk_buff *small_skb; ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -577,6 +578,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } + if (wc->byte_len < IPOIB_CM_COPYBREAK) { + int dlen = wc->byte_len; + + small_skb = dev_alloc_skb(dlen + 12); + if (small_skb) { + skb_reserve(small_skb, 12); + ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_copy_from_linear_data(skb, small_skb->data, dlen); + ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_put(small_skb, dlen); + skb = small_skb; + goto copied; + } + } + frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; @@ -599,6 +617,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); +copied: skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f217b1edd0ac..bfe1dbf99207 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1302,6 +1302,12 @@ static int __init ipoib_init_module(void) ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif + /* + * When copying small received packets, we only copy from the + * linear data part of the SKB, so we rely on this condition. + */ + BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); + ret = ipoib_register_debugfs(); if (ret) return ret; -- cgit v1.2.3 From a7d834c4bc6be73e8f83eaa5072fac3c5549f7f2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: IPoIB/cm: Fix racy use of receive WR/SGL in ipoib_cm_post_receive_nonsrq() For devices that don't support SRQs, ipoib_cm_post_receive_nonsrq() is called from both ipoib_cm_handle_rx_wc() and ipoib_cm_nonsrq_init_rx(), and these two callers are not synchronized against each other. However, ipoib_cm_post_receive_nonsrq() always reuses the same receive work request and scatter list structures, so multiple callers can end up stepping on each other, which leads to posting garbled work requests. Fix this by having the caller pass in the ib_recv_wr and ib_sge structures to use, and allocating new local structures in ipoib_cm_nonsrq_init_rx(). Based on a patch by Pradeep Satyanarayana and David Wilder , with debugging help from Hoang-Nam Nguyen . Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 63 ++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6223fc39af70..37bf67b2a26f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -111,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) } static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, - struct ipoib_cm_rx *rx, int id) + struct ipoib_cm_rx *rx, + struct ib_recv_wr *wr, + struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; - priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < IPOIB_CM_RX_SG; ++i) - priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; + sge[i].addr = rx->rx_ring[id].mapping[i]; - ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_recv(rx->qp, wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, @@ -320,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev, return 0; } +static void ipoib_cm_init_rx_wr(struct net_device *dev, + struct ib_recv_wr *wr, + struct ib_sge *sge) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < priv->cm.num_frags; ++i) + sge[i].lkey = priv->mr->lkey; + + sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < priv->cm.num_frags; ++i) + sge[i].length = PAGE_SIZE; + + wr->next = NULL; + wr->sg_list = priv->cm.rx_sge; + wr->num_sge = priv->cm.num_frags; +} + static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); + struct { + struct ib_recv_wr wr; + struct ib_sge sge[IPOIB_CM_RX_SG]; + } *t; int ret; int i; @@ -331,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; + t = kmalloc(sizeof *t, GFP_KERNEL); + if (!t) { + ret = -ENOMEM; + goto err_free; + } + + ipoib_cm_init_rx_wr(dev, &t->wr, t->sge); + spin_lock_irq(&priv->lock); if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { @@ -349,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; goto err_count; - } - ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); + } + ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); if (ret) { ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " "failed for buf %d\n", i); @@ -361,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i rx->recv_count = ipoib_recvq_size; + kfree(t); + return 0; err_count: @@ -369,6 +404,7 @@ err_count: spin_unlock_irq(&priv->lock); err_free: + kfree(t); ipoib_cm_free_rx_ring(dev, rx->rx_ring); return ret; @@ -637,7 +673,10 @@ repost: ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " "for buf %d\n", wr_id); } else { - if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { + if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, + &priv->cm.rx_wr, + priv->cm.rx_sge, + wr_id))) { --p->recv_count; ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " "for buf %d\n", wr_id); @@ -1502,15 +1541,7 @@ int ipoib_cm_dev_init(struct net_device *dev) priv->cm.num_frags = IPOIB_CM_RX_SG; } - for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].lkey = priv->mr->lkey; - - priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].length = PAGE_SIZE; - priv->cm.rx_wr.next = NULL; - priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = priv->cm.num_frags; + ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge); if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { -- cgit v1.2.3 From 12406734051a26e9fe4c8568e931dfddbb72d431 Mon Sep 17 00:00:00 2001 From: Ron Livne Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: IPoIB: Use multicast loopback blocking if available Set IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK for IPoIB's UD QPs if supported by the underlying device. This creates an improvement of up to 39% in bandwidth when sending multicast packets with IPoIB, and an improvment of 12% in cpu usage. Signed-off-by: Ron Livne Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 810790ae7530..7b8fa36f509b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -199,7 +199,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.recv_cq = priv->recv_cq; if (priv->hca_caps & IB_DEVICE_UD_TSO) - init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; + init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + + if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) + init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (dev->features & NETIF_F_SG) init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; -- cgit v1.2.3 From af40da894e96d5c826d38be3ea53ee00d9de0367 Mon Sep 17 00:00:00 2001 From: Vladimir Sokolovsky Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: IPoIB: add LRO support Add "ipoib_use_lro" module parameter to enable LRO and an "ipoib_lro_max_aggr" module parameter to set the max number of packets to be aggregated. Make LRO controllable and LRO statistics accessible through ethtool. Signed-off-by: Vladimir Sokolovsky Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/Kconfig | 1 + drivers/infiniband/ulp/ipoib/ipoib.h | 11 +++++ drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 46 +++++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 +++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 62 ++++++++++++++++++++++++++++ 5 files changed, 127 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 1f76bad020f3..691525cf394a 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" depends on NETDEVICES && INET && (IPV6 || IPV6=n) + select INET_LRO ---help--- Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 8754b364f229..2c522572e3c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -50,6 +50,7 @@ #include #include #include +#include /* constants */ @@ -94,6 +95,9 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + IPOIB_MAX_LRO_DESCRIPTORS = 8, + IPOIB_LRO_MAX_AGGR = 64, + MAX_SEND_CQE = 16, IPOIB_CM_COPYBREAK = 256, }; @@ -248,6 +252,11 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_lro { + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS]; +}; + /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -334,6 +343,8 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + + struct ipoib_lro lro; }; struct ipoib_ah { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 10279b79c44d..66af5c1a76e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev, return 0; } +static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = { + "LRO aggregated", "LRO flushed", + "LRO avg aggr", "LRO no desc" +}; + +static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + switch (stringset) { + case ETH_SS_STATS: + memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys)); + break; + } +} + +static int ipoib_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ipoib_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void ipoib_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int index = 0; + + /* Get LRO statistics */ + data[index++] = priv->lro.lro_mgr.stats.aggregated; + data[index++] = priv->lro.lro_mgr.stats.flushed; + if (priv->lro.lro_mgr.stats.flushed) + data[index++] = priv->lro.lro_mgr.stats.aggregated / + priv->lro.lro_mgr.stats.flushed; + else + data[index++] = 0; + data[index++] = priv->lro.lro_mgr.stats.no_desc; +} + static const struct ethtool_ops ipoib_ethtool_ops = { .get_drvinfo = ipoib_get_drvinfo, .get_tso = ethtool_op_get_tso, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = ipoib_get_strings, + .get_sset_count = ipoib_get_sset_count, + .get_ethtool_stats = ipoib_get_ethtool_stats, }; void ipoib_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index eca8518d79a0..5d50e5261eed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -288,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) skb->ip_summed = CHECKSUM_UNNECESSARY; - netif_receive_skb(skb); + if (dev->features & NETIF_F_LRO) + lro_receive_skb(&priv->lro.lro_mgr, skb, NULL); + else + netif_receive_skb(skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) @@ -440,6 +443,9 @@ poll_more: } if (done < budget) { + if (dev->features & NETIF_F_LRO) + lro_flush_all(&priv->lro.lro_mgr); + netif_rx_complete(dev, napi); if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bfe1dbf99207..fead88f7fb17 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -60,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); +static int lro; +module_param(lro, bool, 0444); +MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)"); + +static int lro_max_aggr = IPOIB_LRO_MAX_AGGR; +module_param(lro_max_aggr, int, 0644); +MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated " + "(default = 64)"); + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level; @@ -936,6 +945,54 @@ static const struct header_ops ipoib_header_ops = { .create = ipoib_hard_header, }; +static int get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + + if (unlikely(skb->protocol != htons(ETH_P_IP))) + return -1; + + /* + * In the future we may add an else clause that verifies the + * checksum and allows devices which do not calculate checksum + * to use LRO. + */ + if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY)) + return -1; + + /* Check for non-TCP packet */ + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + /* check if IP header and TCP header are complete */ + if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + + return 0; +} + +static void ipoib_lro_setup(struct ipoib_dev_priv *priv) +{ + priv->lro.lro_mgr.max_aggr = lro_max_aggr; + priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS; + priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc; + priv->lro.lro_mgr.get_skb_header = get_skb_hdr; + priv->lro.lro_mgr.features = LRO_F_NAPI; + priv->lro.lro_mgr.dev = priv->dev; + priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; +} + static void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -975,6 +1032,8 @@ static void ipoib_setup(struct net_device *dev) priv->dev = dev; + ipoib_lro_setup(priv); + spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); @@ -1152,6 +1211,9 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; } + if (lro) + priv->dev->features |= NETIF_F_LRO; + /* * Set the full membership bit, so that we join the right * broadcast group, etc. -- cgit v1.2.3 From ee1e2c82c245a5fb2864e9dbcdaab3390fde3fcc Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: IPoIB: Refresh paths instead of flushing them on SM change events The patch tries to solve the problem of device going down and paths being flushed on an SM change event. The method is to mark the paths as candidates for refresh (by setting the new valid flag to 0), and wait for an ARP probe a new path record query. The solution requires a different and less intrusive handling of SM change event. For that, the second argument of the flush function changes its meaning from a boolean flag to a level. In most cases, SM failover doesn't cause LID change so traffic won't stop. In the rare cases of LID change, the remote host (the one that hadn't changed its LID) will lose connectivity until paths are refreshed. This is no worse than the current state. In fact, preventing the device from going down saves packets that otherwise would be lost. Signed-off-by: Moni Levy Signed-off-by: Moni Shoua Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 17 ++++++++++-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 42 ++++++++++++++++++---------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 44 +++++++++++++++++++++++++++--- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 18 ++++++------ 4 files changed, 91 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 2c522572e3c5..bb19587c5eaf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -54,6 +54,12 @@ /* constants */ +enum ipoib_flush_level { + IPOIB_FLUSH_LIGHT, + IPOIB_FLUSH_NORMAL, + IPOIB_FLUSH_HEAVY +}; + enum { IPOIB_ENCAP_LEN = 4, @@ -284,10 +290,11 @@ struct ipoib_dev_priv { struct delayed_work pkey_poll_task; struct delayed_work mcast_task; - struct work_struct flush_task; + struct work_struct flush_light; + struct work_struct flush_normal; + struct work_struct flush_heavy; struct work_struct restart_task; struct delayed_work ah_reap_task; - struct work_struct pkey_event_task; struct ib_device *ca; u8 port; @@ -369,6 +376,7 @@ struct ipoib_path { struct rb_node rb_node; struct list_head list; + int valid; }; struct ipoib_neigh { @@ -433,11 +441,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_ib_dev_flush(struct work_struct *work); +void ipoib_ib_dev_flush_light(struct work_struct *work); +void ipoib_ib_dev_flush_normal(struct work_struct *work); +void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5d50e5261eed..66cafa20c246 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -902,7 +902,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; } -static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) +static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; @@ -915,7 +916,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, pkey_event); + __ipoib_ib_dev_flush(cpriv, level); mutex_unlock(&priv->vlan_mutex); @@ -929,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) return; } - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ipoib_ib_dev_down(dev, 0); @@ -947,11 +948,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) priv->pkey_index = new_index; } - ipoib_dbg(priv, "flushing\n"); + if (level == IPOIB_FLUSH_LIGHT) { + ipoib_mark_paths_invalid(dev); + ipoib_mcast_dev_flush(dev); + } - ipoib_ib_dev_down(dev, 0); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_down(dev, 0); - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_open(dev); } @@ -961,27 +966,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * we get here, don't bring it back up if it's not configured up */ if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { - ipoib_ib_dev_up(dev); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_up(dev); ipoib_mcast_restart_task(&priv->restart_task); } } -void ipoib_ib_dev_flush(struct work_struct *work) +void ipoib_ib_dev_flush_light(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, flush_light); + + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); +} + +void ipoib_ib_dev_flush_normal(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, flush_task); + container_of(work, struct ipoib_dev_priv, flush_normal); - ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); } -void ipoib_pkey_event(struct work_struct *work) +void ipoib_ib_dev_flush_heavy(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_event_task); + container_of(work, struct ipoib_dev_priv, flush_heavy); - ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 1); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); } void ipoib_ib_dev_cleanup(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index fead88f7fb17..b3fd7e8333cf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -357,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ +void ipoib_mark_paths_invalid(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path, *tp; + + spin_lock_irq(&priv->lock); + + list_for_each_entry_safe(path, tp, &priv->path_list, list) { + ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n", + be16_to_cpu(path->pathrec.dlid), + IPOIB_GID_ARG(path->pathrec.dgid)); + path->valid = 0; + } + + spin_unlock_irq(&priv->lock); +} + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -393,6 +410,7 @@ static void path_rec_completion(int status, struct net_device *dev = path->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah = NULL; + struct ipoib_ah *old_ah; struct ipoib_neigh *neigh, *tn; struct sk_buff_head skqueue; struct sk_buff *skb; @@ -416,6 +434,7 @@ static void path_rec_completion(int status, spin_lock_irqsave(&priv->lock, flags); + old_ah = path->ah; path->ah = ah; if (ah) { @@ -428,6 +447,17 @@ static void path_rec_completion(int status, __skb_queue_tail(&skqueue, skb); list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { + if (neigh->ah) { + WARN_ON(neigh->ah != old_ah); + /* + * Dropping the ah reference inside + * priv->lock is safe here, because we + * will hold one more reference from + * the original value of path->ah (ie + * old_ah). + */ + ipoib_put_ah(neigh->ah); + } kref_get(&path->ah->ref); neigh->ah = path->ah; memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, @@ -450,6 +480,7 @@ static void path_rec_completion(int status, while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } + path->valid = 1; } path->query = NULL; @@ -457,6 +488,9 @@ static void path_rec_completion(int status, spin_unlock_irqrestore(&priv->lock, flags); + if (old_ah) + ipoib_put_ah(old_ah); + while ((skb = __skb_dequeue(&skqueue))) { skb->dev = dev; if (dev_queue_xmit(skb)) @@ -630,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock(&priv->lock); path = __path_find(dev, phdr->hwaddr + 4); - if (!path) { - path = path_rec_create(dev, phdr->hwaddr + 4); + if (!path || !path->valid) { + if (!path) + path = path_rec_create(dev, phdr->hwaddr + 4); if (path) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof *phdr); @@ -1046,9 +1081,10 @@ static void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); - INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); - INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); + INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); + INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); + INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 7b8fa36f509b..96f9aa79cbbe 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -290,15 +290,17 @@ void ipoib_event(struct ib_event_handler *handler, if (record->element.port_num != priv->port) return; - if (record->event == IB_EVENT_PORT_ERR || - record->event == IB_EVENT_PORT_ACTIVE || - record->event == IB_EVENT_LID_CHANGE || - record->event == IB_EVENT_SM_CHANGE || + ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, + record->device->name, record->element.port_num); + + if (record->event == IB_EVENT_SM_CHANGE || record->event == IB_EVENT_CLIENT_REREGISTER) { - ipoib_dbg(priv, "Port state change event\n"); - queue_work(ipoib_workqueue, &priv->flush_task); + queue_work(ipoib_workqueue, &priv->flush_light); + } else if (record->event == IB_EVENT_PORT_ERR || + record->event == IB_EVENT_PORT_ACTIVE || + record->event == IB_EVENT_LID_CHANGE) { + queue_work(ipoib_workqueue, &priv->flush_normal); } else if (record->event == IB_EVENT_PKEY_CHANGE) { - ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); - queue_work(ipoib_workqueue, &priv->pkey_event_task); + queue_work(ipoib_workqueue, &priv->flush_heavy); } } -- cgit v1.2.3 From c03d4731b5b6de45b95a10bf1d510dde423d6757 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Remove unused IPOIB_MCAST_STARTED code The IPOIB_MCAST_STARTED flag is not used at all since commit b3e2749b ("IPoIB: Don't drop multicast sends when they can be queued"), so remove it. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 8 -------- 2 files changed, 9 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bb19587c5eaf..66a897567ea9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -89,7 +89,6 @@ enum { IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, - IPOIB_MCAST_STARTED = 8, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_CSUM = 11, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 4a6538b9301a..0b7d129161e1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -592,10 +592,6 @@ int ipoib_mcast_start_thread(struct net_device *dev) queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); - spin_lock_irq(&priv->lock); - set_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - return 0; } @@ -605,10 +601,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) ipoib_dbg_mcast(priv, "stopping multicast thread\n"); - spin_lock_irq(&priv->lock); - clear_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - mutex_lock(&mcast_mutex); clear_bit(IPOIB_MCAST_RUN, &priv->flags); cancel_delayed_work(&priv->mcast_task); -- cgit v1.2.3 From 5892eff91ad60ba365ae7f75050ce464036c5396 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Remove priv->mcast_mutex No need for a mutex around calls to ib_attach_mcast/ib_detach_mcast since these operations are synchronized at the HW driver layer. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 4 ---- 3 files changed, 6 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 66a897567ea9..b8753222c870 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -277,7 +277,6 @@ struct ipoib_dev_priv { unsigned long flags; - struct mutex mcast_mutex; struct mutex vlan_mutex; struct rb_root path_tree; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b3fd7e8333cf..8be9ea0436e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1072,7 +1072,6 @@ static void ipoib_setup(struct net_device *dev) spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - mutex_init(&priv->mcast_mutex); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 96f9aa79cbbe..f50ebe0643ef 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -61,9 +61,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) } /* attach QP to multicast group */ - mutex_lock(&priv->mcast_mutex); ret = ib_attach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); @@ -77,9 +75,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - mutex_lock(&priv->mcast_mutex); ret = ib_detach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); -- cgit v1.2.3 From d0de13622d5ac658efe7c51521dbdbe0752aa3dd Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Only set Q_Key once: after joining broadcast group The current code will set the Q_Key for any join of a non-sendonly multicast group. The operation involves a modify QP operation, which is fairly heavyweight, and is only really required after the join of the broadcast group. Fix this by adding a parameter to ipoib_mcast_attach() to control when the Q_Key is set. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 4 +++- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 28 ++++++++++++++------------ 3 files changed, 19 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b8753222c870..7b46e2d7b3c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -485,7 +485,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif int ipoib_mcast_attach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); + union ib_gid *mgid, int set_qkey); int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0b7d129161e1..55ebd950bf23 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -186,6 +186,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah; int ret; + int set_qkey = 0; mcast->mcmember = *mcmember; @@ -200,6 +201,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; + set_qkey = 1; } if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -212,7 +214,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + &mcast->mcmember.mgid, set_qkey); if (ret < 0) { ipoib_warn(priv, "couldn't attach QP to multicast group " IPOIB_GID_FMT "\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index f50ebe0643ef..ba7c8868e6f7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -33,18 +33,13 @@ #include "ipoib.h" -int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) +int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_qp_attr *qp_attr; + struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; - ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); - if (!qp_attr) - goto out; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; @@ -52,12 +47,19 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - /* set correct QKey for QP */ - qp_attr->qkey = priv->qkey; - ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); - if (ret) { - ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); - goto out; + if (set_qkey) { + ret = -ENOMEM; + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + goto out; + + /* set correct QKey for QP */ + qp_attr->qkey = priv->qkey; + ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); + if (ret) { + ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); + goto out; + } } /* attach QP to multicast group */ -- cgit v1.2.3 From 9eae554c171e086c89ab83da2a2d3c8bf958fcb5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Get rid of ipoib_mcast_detach() wrapper ipoib_mcast_detach() does nothing except call ib_detach_mcast(), so just use the core API in the one place that does a multicast group detach. add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-105 (-105) function old new delta ipoib_mcast_leave 357 319 -38 ipoib_mcast_detach 67 - -67 Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 -- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 +++--- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 12 ------------ 3 files changed, 3 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 7b46e2d7b3c2..a89b9fbe1ef4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -486,8 +486,6 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey); -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 55ebd950bf23..71add7a8d53c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -627,10 +627,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) IPOIB_GID_ARG(mcast->mcmember.mgid)); /* Remove ourselves from the multicast group */ - ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, + be16_to_cpu(mcast->mcmember.mlid)); if (ret) - ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); + ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); } return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index ba7c8868e6f7..68325119f740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -72,18 +72,6 @@ out: return ret; } -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - int ret; - - ret = ib_detach_mcast(priv->qp, mgid, mlid); - if (ret) - ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); - - return ret; -} - int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); -- cgit v1.2.3 From c8c2afe360b7366f586f6bece1109a72ea334876 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:51 -0700 Subject: IPoIB: Use rtnl lock/unlock when changing device flags Use of this lock is required to synchronize changes to the netdvice's data structs. Also move the call to ipoib_flush_paths() after the modification of the netdevice flags in set_mode(). Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 ++++++-- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 37bf67b2a26f..b4269139135b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1440,7 +1440,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + rtnl_lock(); dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); + rtnl_unlock(); priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); @@ -1449,14 +1451,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); - dev->mtu = min(priv->mcast_mtu, dev->mtu); - ipoib_flush_paths(dev); + rtnl_lock(); if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) { dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; if (priv->hca_caps & IB_DEVICE_UD_TSO) dev->features |= NETIF_F_TSO; } + dev->mtu = min(priv->mcast_mtu, dev->mtu); + rtnl_unlock(); + ipoib_flush_paths(dev); return count; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 71add7a8d53c..be1ed38cdcfd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - if (!ipoib_cm_admin_enabled(dev)) + if (!ipoib_cm_admin_enabled(dev)) { + rtnl_lock(); dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + rtnl_unlock(); + } ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); -- cgit v1.2.3 From bd3606715effbf37df986548c43bbed0842b49d5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:51 -0700 Subject: IPoIB: Use dev_set_mtu() to change mtu When the driver sets the MTU of the net device outside of its change_mtu method, it should make use of dev_set_mtu() instead of directly setting the mtu field of struct netdevice. Otherwise functions registered to be called upon MTU change will not get called (this is done through call_netdevice_notifiers() in dev_set_mtu()). Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index b4269139135b..87f9f3ef3b2d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1458,7 +1458,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, if (priv->hca_caps & IB_DEVICE_UD_TSO) dev->features |= NETIF_F_TSO; } - dev->mtu = min(priv->mcast_mtu, dev->mtu); + dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); rtnl_unlock(); ipoib_flush_paths(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index be1ed38cdcfd..1fcc9a898d81 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -577,7 +577,7 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!ipoib_cm_admin_enabled(dev)) { rtnl_lock(); - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); rtnl_unlock(); } -- cgit v1.2.3 From e112373fd6aa280bd2cbc0d5cc3809115325a1be Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: IPoIB/cm: Reduce connected mode TX object size Since IPoIB connected mode does not NETIF_F_SG, we only have one DMA mapping per send, so we don't need a mapping[] array. Define a new struct with a single u64 mapping member and use it for the CM tx_ring. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 ++++++- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 12 ++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a89b9fbe1ef4..0281c8fecc90 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -157,6 +157,11 @@ struct ipoib_tx_buf { u64 mapping[MAX_SKB_FRAGS + 1]; }; +struct ipoib_cm_tx_buf { + struct sk_buff *skb; + u64 mapping; +}; + struct ib_cm_id; struct ipoib_cm_data { @@ -215,7 +220,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_tx_buf *tx_ring; + struct ipoib_cm_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 87f9f3ef3b2d..0f2d3045061a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -703,7 +703,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; u64 addr; if (unlikely(skb->len > tx->mtu)) { @@ -734,7 +734,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } - tx_req->mapping[0] = addr; + tx_req->mapping = addr; if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), addr, skb->len))) { @@ -759,7 +759,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -773,7 +773,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -1143,7 +1143,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; unsigned long begin; @@ -1171,7 +1171,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; -- cgit v1.2.3 From bc3a290b51aaefc6a6af2d6e6d52ed32387c416c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: IPoIB: Double default RX/TX ring sizes Increase IPoIB ring sizes to twice their original sizes (RX: 128->256, TX: 64->128) to act as a shock absorber for high traffic peaks. With the current settings, we have seen cases that there are many calls to netif_stop_queue(), which causes degradation in throughput. Also, larger receive buffer sizes help IPoIB in CM mode to avoid experiencing RNR NAK conditions due to insufficient receive buffers at the SRQ. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0281c8fecc90..b0ffc9abe8c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -70,8 +70,8 @@ enum { IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, - IPOIB_RX_RING_SIZE = 128, - IPOIB_TX_RING_SIZE = 64, + IPOIB_RX_RING_SIZE = 256, + IPOIB_TX_RING_SIZE = 128, IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MIN_QUEUE_SIZE = 2, IPOIB_CM_MAX_CONN_QP = 4096, -- cgit v1.2.3