diff options
author | Devesh Sharma <devesh.sharma@emulex.com> | 2014-02-04 11:56:54 +0530 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-04-03 08:29:34 -0700 |
commit | ea61762679cd4d409dcaa6f502f190f4c8156d09 (patch) | |
tree | 5827d101e41788a4ff93c2368eaa4d3ec5ae337a /drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | |
parent | bc1b04ab34a1485339571242cb0fbad823835685 (diff) | |
download | linux-ea61762679cd4d409dcaa6f502f190f4c8156d09.tar.gz linux-ea61762679cd4d409dcaa6f502f190f4c8156d09.tar.bz2 linux-ea61762679cd4d409dcaa6f502f190f4c8156d09.zip |
RDMA/ocrdma: EQ full catastrophe avoidance
Stale entries in the CQ being destroyed causes hardware to generate
EQEs indefinitely for a given CQ. Thus causing uncontrolled execution
of irq_handler. This patch fixes this using following sementics:
* irq_handler will ring EQ doorbell atleast once and implement budgeting scheme.
* cq_destroy will count number of valid entires during destroy and ring
cq-db so that hardware does not generate uncontrolled EQE.
* cq_destroy will synchronize with last running irq_handler instance.
* arm_cq will always defer arming CQ till poll_cq, except for the first arm_cq call.
* poll_cq will always ring cq-db with arm=SET if arm_cq was called prior to enter poll_cq.
* poll_cq will always ring cq-db with arm=UNSET if arm_cq was not called prior to enter poll_cq.
Signed-off-by: Devesh Sharma <devesh.sharma@emulex.com>
Signed-off-by: Selvin Xavier <selvin.xavier@emulex.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/hw/ocrdma/ocrdma_verbs.c')
-rw-r--r-- | drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 81 |
1 files changed, 61 insertions, 20 deletions
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index f1108ebae83a..7d59ed340991 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -910,6 +910,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, spin_lock_init(&cq->comp_handler_lock); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); + cq->first_arm = true; if (ib_ctx) { uctx = get_ocrdma_ucontext(ib_ctx); @@ -927,9 +928,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, goto ctx_err; } cq->phase = OCRDMA_CQE_VALID; - cq->arm_needed = true; dev->cq_tbl[cq->id] = cq; - return &cq->ibcq; ctx_err: @@ -952,15 +951,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt, return status; } +static void ocrdma_flush_cq(struct ocrdma_cq *cq) +{ + int cqe_cnt; + int valid_count = 0; + unsigned long flags; + + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); + struct ocrdma_cqe *cqe = NULL; + + cqe = cq->va; + cqe_cnt = cq->cqe_cnt; + + /* Last irq might have scheduled a polling thread + * sync-up with it before hard flushing. + */ + spin_lock_irqsave(&cq->cq_lock, flags); + while (cqe_cnt) { + if (is_cqe_valid(cq, cqe)) + valid_count++; + cqe++; + cqe_cnt--; + } + ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count); + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + int ocrdma_destroy_cq(struct ib_cq *ibcq) { int status; struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_eq *eq = NULL; struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); int pdid = 0; + u32 irq, indx; - status = ocrdma_mbx_destroy_cq(dev, cq); + dev->cq_tbl[cq->id] = NULL; + indx = ocrdma_get_eq_table_index(dev, cq->eqn); + if (indx == -EINVAL) + BUG(); + eq = &dev->eq_tbl[indx]; + irq = ocrdma_get_irq(dev, eq); + synchronize_irq(irq); + ocrdma_flush_cq(cq); + + status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { pdid = cq->ucontext->cntxt_pd->id; ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, @@ -969,7 +1005,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } - dev->cq_tbl[cq->id] = NULL; kfree(cq); return status; @@ -2705,10 +2740,18 @@ expand_cqe: } stop_cqe: cq->getp = cur_getp; - if (polled_hw_cqes || expand || stop) { - ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited, + if (cq->deferred_arm) { + ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, + polled_hw_cqes); + cq->deferred_arm = false; + cq->deferred_sol = false; + } else { + /* We need to pop the CQE. No need to arm */ + ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, polled_hw_cqes); + cq->deferred_sol = false; } + return i; } @@ -2780,30 +2823,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); u16 cq_id; - u16 cur_getp; - struct ocrdma_cqe *cqe; unsigned long flags; + bool arm_needed = false, sol_needed = false; cq_id = cq->id; spin_lock_irqsave(&cq->cq_lock, flags); if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED) - cq->armed = true; + arm_needed = true; if (cq_flags & IB_CQ_SOLICITED) - cq->solicited = true; - - cur_getp = cq->getp; - cqe = cq->va + cur_getp; + sol_needed = true; - /* check whether any valid cqe exist or not, if not then safe to - * arm. If cqe is not yet consumed, then let it get consumed and then - * we arm it to avoid false interrupts. - */ - if (!is_cqe_valid(cq, cqe) || cq->arm_needed) { - cq->arm_needed = false; - ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0); + if (cq->first_arm) { + ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); + cq->first_arm = false; + goto skip_defer; } + cq->deferred_arm = true; + +skip_defer: + cq->deferred_sol = sol_needed; spin_unlock_irqrestore(&cq->cq_lock, flags); + return 0; } |