RDMA/bnxt_re: Avoid Hard lockup during error CQE processing
authorSelvin Xavier <selvin.xavier@broadcom.com>
Tue, 6 Mar 2018 05:49:28 +0000 (21:49 -0800)
committerJason Gunthorpe <jgg@mellanox.com>
Wed, 7 Mar 2018 03:08:39 +0000 (20:08 -0700)
Hitting the following hardlockup due to a race condition in
error CQE processing.

[26146.879798] bnxt_en 0000:04:00.0: QPLIB: FP: CQ Processed Req
[26146.886346] bnxt_en 0000:04:00.0: QPLIB: wr_id[1251] = 0x0 with status 0xa
[26156.350935] NMI watchdog: Watchdog detected hard LOCKUP on cpu 4
[26156.357470] Modules linked in: nfsd auth_rpcgss nfs_acl lockd grace
[26156.447957] CPU: 4 PID: 3413 Comm: kworker/4:1H Kdump: loaded
[26156.457994] Hardware name: Dell Inc. PowerEdge R430/0CN7X8,
[26156.466390] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core]
[26156.472639] Call Trace:
[26156.475379]  <NMI>  [<ffffffff98d0d722>] dump_stack+0x19/0x1b
[26156.481833]  [<ffffffff9873f775>] watchdog_overflow_callback+0x135/0x140
[26156.489341]  [<ffffffff9877f237>] __perf_event_overflow+0x57/0x100
[26156.496256]  [<ffffffff98787c24>] perf_event_overflow+0x14/0x20
[26156.502887]  [<ffffffff9860a580>] intel_pmu_handle_irq+0x220/0x510
[26156.509813]  [<ffffffff98d16031>] perf_event_nmi_handler+0x31/0x50
[26156.516738]  [<ffffffff98d1790c>] nmi_handle.isra.0+0x8c/0x150
[26156.523273]  [<ffffffff98d17be8>] do_nmi+0x218/0x460
[26156.528834]  [<ffffffff98d16d79>] end_repeat_nmi+0x1e/0x7e
[26156.534980]  [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200
[26156.543268]  [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200
[26156.551556]  [<ffffffff987089c0>] ? native_queued_spin_lock_slowpath+0x1d0/0x200
[26156.559842]  <EOE>  [<ffffffff98d083e4>] queued_spin_lock_slowpath+0xb/0xf
[26156.567555]  [<ffffffff98d15690>] _raw_spin_lock+0x20/0x30
[26156.573696]  [<ffffffffc08381a1>] bnxt_qplib_lock_buddy_cq+0x31/0x40 [bnxt_re]
[26156.581789]  [<ffffffffc083bbaa>] bnxt_qplib_poll_cq+0x43a/0xf10 [bnxt_re]
[26156.589493]  [<ffffffffc083239b>] bnxt_re_poll_cq+0x9b/0x760 [bnxt_re]

The issue happens if RQ poll_cq or SQ poll_cq or Async error event tries to
put the error QP in flush list. Since SQ and RQ of each error qp are added
to two different flush list, we need to protect it using locks of
corresponding CQs. Difference in order of acquiring the lock in
SQ poll_cq and RQ poll_cq can cause a hard lockup.

Revisits the locking strategy and removes the usage of qplib_cq.hwq.lock.
Instead of this lock, introduces qplib_cq.flush_lock to handle
addition/deletion of QPs in flush list. Also, always invoke the flush_lock
in order (SQ CQ lock first and then RQ CQ lock) to avoid any potential
deadlock.

Other than the poll_cq context, the movement of QP to/from flush list can
be done in modify_qp context or from an async error event from HW.
Synchronize these operations using the bnxt_re verbs layer CQ locks.
To achieve this, adds a call back to the HW abstraction layer(qplib) to
bnxt_re ib_verbs layer in case of async error event. Also, removes the
buddy cq functions as it is no longer required.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/ib_verbs.h
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_fp.h
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c

index 755f1ccd82bbff44cb75ca7afcd7392cf09dc354..0dd75f449872404701b5294db1a02737c85523fb 100644 (file)
@@ -785,7 +785,7 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
        return 0;
 }
 
-static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
        __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
 {
        unsigned long flags;
@@ -799,8 +799,8 @@ static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
        return flags;
 }
 
-static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
-                              unsigned long flags)
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+                       unsigned long flags)
        __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
 {
        if (qp->rcq != qp->scq)
@@ -1606,6 +1606,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
        int status;
        union ib_gid sgid;
        struct ib_gid_attr sgid_attr;
+       unsigned int flags;
        u8 nw_type;
 
        qp->qplib_qp.modify_flags = 0;
@@ -1634,14 +1635,18 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                        dev_dbg(rdev_to_dev(rdev),
                                "Move QP = %p to flush list\n",
                                qp);
+                       flags = bnxt_re_lock_cqs(qp);
                        bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+                       bnxt_re_unlock_cqs(qp, flags);
                }
                if (!qp->sumem &&
                    qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
                        dev_dbg(rdev_to_dev(rdev),
                                "Move QP = %p out of flush list\n",
                                qp);
+                       flags = bnxt_re_lock_cqs(qp);
                        bnxt_qplib_clean_qp(&qp->qplib_qp);
+                       bnxt_re_unlock_cqs(qp, flags);
                }
        }
        if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
index b88a48d43a9dddd49c210d0a16779f3d4d0369b9..e62b7c2c7da6a1953605fbe7d089a3d9b6f9a50a 100644 (file)
@@ -222,4 +222,7 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
                                           struct ib_udata *udata);
 int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
 int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
 #endif /* __BNXT_RE_IB_VERBS_H__ */
index 604c805ceaa7af7b4cb9dadced2b38c2543ef60f..f6e361750466f50acab01a4f1a397260e3a84472 100644 (file)
@@ -730,6 +730,13 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
                                         struct bnxt_re_qp *qp)
 {
        struct ib_event event;
+       unsigned int flags;
+
+       if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+               flags = bnxt_re_lock_cqs(qp);
+               bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+               bnxt_re_unlock_cqs(qp, flags);
+       }
 
        memset(&event, 0, sizeof(event));
        if (qp->qplib_qp.srq) {
index 3ea5b9624f6b79c3f54c6d069b4af7ab133d1f31..06b42c880fd4594b20bc50263dc4add1db17460d 100644 (file)
@@ -88,75 +88,35 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
        }
 }
 
-void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
-                                unsigned long *flags)
-       __acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock)
+static void bnxt_qplib_acquire_cq_flush_locks(struct bnxt_qplib_qp *qp,
+                                      unsigned long *flags)
+       __acquires(&qp->scq->flush_lock) __acquires(&qp->rcq->flush_lock)
 {
-       spin_lock_irqsave(&qp->scq->hwq.lock, *flags);
+       spin_lock_irqsave(&qp->scq->flush_lock, *flags);
        if (qp->scq == qp->rcq)
-               __acquire(&qp->rcq->hwq.lock);
+               __acquire(&qp->rcq->flush_lock);
        else
-               spin_lock(&qp->rcq->hwq.lock);
+               spin_lock(&qp->rcq->flush_lock);
 }
 
-void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
-                                unsigned long *flags)
-       __releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock)
+static void bnxt_qplib_release_cq_flush_locks(struct bnxt_qplib_qp *qp,
+                                      unsigned long *flags)
+       __releases(&qp->scq->flush_lock) __releases(&qp->rcq->flush_lock)
 {
        if (qp->scq == qp->rcq)
-               __release(&qp->rcq->hwq.lock);
+               __release(&qp->rcq->flush_lock);
        else
-               spin_unlock(&qp->rcq->hwq.lock);
-       spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags);
-}
-
-static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp,
-                                                     struct bnxt_qplib_cq *cq)
-{
-       struct bnxt_qplib_cq *buddy_cq = NULL;
-
-       if (qp->scq == qp->rcq)
-               buddy_cq = NULL;
-       else if (qp->scq == cq)
-               buddy_cq = qp->rcq;
-       else
-               buddy_cq = qp->scq;
-       return buddy_cq;
-}
-
-static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp,
-                                    struct bnxt_qplib_cq *cq)
-       __acquires(&buddy_cq->hwq.lock)
-{
-       struct bnxt_qplib_cq *buddy_cq = NULL;
-
-       buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-       if (!buddy_cq)
-               __acquire(&cq->hwq.lock);
-       else
-               spin_lock(&buddy_cq->hwq.lock);
-}
-
-static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp,
-                                      struct bnxt_qplib_cq *cq)
-       __releases(&buddy_cq->hwq.lock)
-{
-       struct bnxt_qplib_cq *buddy_cq = NULL;
-
-       buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-       if (!buddy_cq)
-               __release(&cq->hwq.lock);
-       else
-               spin_unlock(&buddy_cq->hwq.lock);
+               spin_unlock(&qp->rcq->flush_lock);
+       spin_unlock_irqrestore(&qp->scq->flush_lock, *flags);
 }
 
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 {
        unsigned long flags;
 
-       bnxt_qplib_acquire_cq_locks(qp, &flags);
+       bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
        __bnxt_qplib_add_flush_qp(qp);
-       bnxt_qplib_release_cq_locks(qp, &flags);
+       bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
@@ -177,7 +137,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
 {
        unsigned long flags;
 
-       bnxt_qplib_acquire_cq_locks(qp, &flags);
+       bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
        __clean_cq(qp->scq, (u64)(unsigned long)qp);
        qp->sq.hwq.prod = 0;
        qp->sq.hwq.cons = 0;
@@ -186,7 +146,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
        qp->rq.hwq.cons = 0;
 
        __bnxt_qplib_del_flush_qp(qp);
-       bnxt_qplib_release_cq_locks(qp, &flags);
+       bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void bnxt_qpn_cqn_sched_task(struct work_struct *work)
@@ -2107,9 +2067,6 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
        /* Must block new posting of SQ and RQ */
        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
        bnxt_qplib_cancel_phantom_processing(qp);
-
-       /* Add qp to flush list of the CQ */
-       __bnxt_qplib_add_flush_qp(qp);
 }
 
 /* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
@@ -2285,9 +2242,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
                                sw_sq_cons, cqe->wr_id, cqe->status);
                        cqe++;
                        (*budget)--;
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
                        bnxt_qplib_mark_qp_error(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       /* Add qp to flush list of the CQ */
+                       bnxt_qplib_add_flush_qp(qp);
                } else {
                        if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
                                /* Before we complete, do WA 9060 */
@@ -2403,9 +2360,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
                if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
                        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
                        /* Add qp to flush list of the CQ */
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
-                       __bnxt_qplib_add_flush_qp(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       bnxt_qplib_add_flush_qp(qp);
                }
        }
 
@@ -2489,9 +2444,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
                if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
                        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
                        /* Add qp to flush list of the CQ */
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
-                       __bnxt_qplib_add_flush_qp(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       bnxt_qplib_add_flush_qp(qp);
                }
        }
 done:
@@ -2501,11 +2454,9 @@ done:
 bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 {
        struct cq_base *hw_cqe, **hw_cqe_ptr;
-       unsigned long flags;
        u32 sw_cons, raw_cons;
        bool rc = true;
 
-       spin_lock_irqsave(&cq->hwq.lock, flags);
        raw_cons = cq->hwq.cons;
        sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
        hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
@@ -2513,7 +2464,6 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 
         /* Check for Valid bit. If the CQE is valid, return false */
        rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
        return rc;
 }
 
@@ -2602,9 +2552,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
                if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
                        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
                        /* Add qp to flush list of the CQ */
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
-                       __bnxt_qplib_add_flush_qp(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       bnxt_qplib_add_flush_qp(qp);
                }
        }
 
@@ -2719,9 +2667,7 @@ do_rq:
         */
 
        /* Add qp to flush list of the CQ */
-       bnxt_qplib_lock_buddy_cq(qp, cq);
-       __bnxt_qplib_add_flush_qp(qp);
-       bnxt_qplib_unlock_buddy_cq(qp, cq);
+       bnxt_qplib_add_flush_qp(qp);
 done:
        return rc;
 }
@@ -2750,7 +2696,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
        u32 budget = num_cqes;
        unsigned long flags;
 
-       spin_lock_irqsave(&cq->hwq.lock, flags);
+       spin_lock_irqsave(&cq->flush_lock, flags);
        list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
                dev_dbg(&cq->hwq.pdev->dev,
                        "QPLIB: FP: Flushing SQ QP= %p",
@@ -2764,7 +2710,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
                        qp);
                __flush_rq(&qp->rq, qp, &cqe, &budget);
        }
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
+       spin_unlock_irqrestore(&cq->flush_lock, flags);
 
        return num_cqes - budget;
 }
@@ -2773,11 +2719,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
        struct cq_base *hw_cqe, **hw_cqe_ptr;
-       unsigned long flags;
        u32 sw_cons, raw_cons;
        int budget, rc = 0;
 
-       spin_lock_irqsave(&cq->hwq.lock, flags);
        raw_cons = cq->hwq.cons;
        budget = num_cqes;
 
@@ -2853,20 +2797,15 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
        }
 exit:
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
        return num_cqes - budget;
 }
 
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->hwq.lock, flags);
        if (arm_type)
                bnxt_qplib_arm_cq(cq, arm_type);
        /* Using cq->arm_state variable to track whether to issue cq handler */
        atomic_set(&cq->arm_state, 1);
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
 }
 
 void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp)
index ca0a2ffa3509093a7953a80ea310f3f9314ace03..ade9f13c0fd1bb2e8cc04a8a1a85c9da1221db41 100644 (file)
@@ -389,6 +389,18 @@ struct bnxt_qplib_cq {
        struct list_head                sqf_head, rqf_head;
        atomic_t                        arm_state;
        spinlock_t                      compl_lock; /* synch CQ handlers */
+/* Locking Notes:
+ * QP can move to error state from modify_qp, async error event or error
+ * CQE as part of poll_cq. When QP is moved to error state, it gets added
+ * to two flush lists, one each for SQ and RQ.
+ * Each flush list is protected by qplib_cq->flush_lock. Both scq and rcq
+ * flush_locks should be acquired when QP is moved to error. The control path
+ * operations(modify_qp and async error events) are synchronized with poll_cq
+ * using upper level CQ locks (bnxt_re_cq->cq_lock) of both SCQ and RCQ.
+ * The qplib_cq->flush_lock is required to synchronize two instances of poll_cq
+ * of the same QP while manipulating the flush list.
+ */
+       spinlock_t                      flush_lock; /* QP flush management */
 };
 
 #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE        sizeof(struct xrrq_irrq)
index 14d153d4013caf7581442b2bad04d38c13c0168c..80027a494730df22944bfbdc678ca36deadcec51 100644 (file)
@@ -305,9 +305,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
                        err_event->res_err_state_reason);
                if (!qp)
                        break;
-               bnxt_qplib_acquire_cq_locks(qp, &flags);
                bnxt_qplib_mark_qp_error(qp);
-               bnxt_qplib_release_cq_locks(qp, &flags);
+               rcfw->aeq_handler(rcfw, qp_event, qp);
                break;
        default:
                /* Command Response */