RDMA/cxgb4: Don't change QP state outside EP lock
authorSteve Wise <swise@opengridcomputing.com>
Tue, 10 May 2011 05:06:22 +0000 (22:06 -0700)
committerRoland Dreier <roland@purestorage.com>
Tue, 10 May 2011 05:06:22 +0000 (22:06 -0700)
Concurrent ingress CLOSE and ULP ABORT operations causes a crash due
to a race condition where the close path releases the EP lock and then
tries to move the QP state to CLOSED.  This must be done inside the EP
lock to avoid the race.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c

index 9d8dcfab2b38a7a2489cf7edc942c58c8c6c2c20..d235810e52df787423b9da33a892b3c010d9afb3 100644 (file)
@@ -1466,7 +1466,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
        struct c4iw_qp_attributes attrs;
        int disconnect = 1;
        int release = 0;
-       int closing = 0;
+       int abort = 0;
        struct tid_info *t = dev->rdev.lldi.tids;
        unsigned int tid = GET_TID(hdr);
 
@@ -1507,8 +1507,11 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
        case FPDU_MODE:
                start_ep_timer(ep);
                __state_set(&ep->com, CLOSING);
-               closing = 1;
+               attrs.next_state = C4IW_QP_STATE_CLOSING;
+               abort = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+                                      C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
                peer_close_upcall(ep);
+               disconnect = 1;
                break;
        case ABORTING:
                disconnect = 0;
@@ -1536,11 +1539,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
                BUG_ON(1);
        }
        mutex_unlock(&ep->com.mutex);
-       if (closing) {
-               attrs.next_state = C4IW_QP_STATE_CLOSING;
-               c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
-                              C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
-       }
        if (disconnect)
                c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
        if (release)
@@ -1710,14 +1708,14 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
        ep = lookup_tid(t, tid);
        BUG_ON(!ep);
 
-       if (ep->com.qp) {
+       if (ep && ep->com.qp) {
                printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
                       ep->com.qp->wq.sq.qid);
                attrs.next_state = C4IW_QP_STATE_TERMINATE;
                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
        } else
-               printk(KERN_WARNING MOD "TERM received tid %u no qp\n", tid);
+               printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
 
        return 0;
 }
index 9f6166f59268e8a597e7a8ea5a958184b217afc7..8e16eb2de91f95fc02606439bda39cf67a30d45b 100644 (file)
@@ -161,8 +161,8 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
                }
        } while (!wr_waitp->done);
        if (wr_waitp->ret)
-               printk(KERN_WARNING MOD "%s: FW reply %d tid %u qpid %u\n",
-                      pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+               PDBG("%s: FW reply %d tid %u qpid %u\n",
+                    pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
        return wr_waitp->ret;
 }
 
index 70a5a3c646da478a4ae1b1088f2440f604bfed94..a1824a5f3d760b24db060e8637ac5b135552feb2 100644 (file)
@@ -1210,7 +1210,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
                        if (ret) {
                                if (internal)
                                        c4iw_get_ep(&qhp->ep->com);
-                               disconnect = abort = 1;
                                goto err;
                        }
                        break;