rdma/cxgb4: Add support for kernel mode SRQ's
authorRaju Rangoju <rajur@chelsio.com>
Wed, 25 Jul 2018 15:52:14 +0000 (21:22 +0530)
committerJason Gunthorpe <jgg@mellanox.com>
Thu, 26 Jul 2018 03:08:04 +0000 (21:08 -0600)
This patch implements the srq specific verbs such as create/destroy/modify
and post_srq_recv. And adds srq specific structures and defines to t4.h
and uapi.

Also updates the cq poll logic to deal with completions that are
associated with the SRQ's.

This patch also handles kernel mode SRQ_LIMIT events as well as flushed
SRQ buffers

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/resource.c
drivers/infiniband/hw/cxgb4/t4.h

index 77243f7e17d5797f68d724237d63ac8e6eb44d78..54f7fbef78807a8ceaa03a862fb3857fc2f2b228 100644 (file)
@@ -1853,10 +1853,34 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
        return 0;
 }
 
+static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status)
+{
+       enum chip_type adapter_type;
+       u32 srqidx;
+       u8 status;
+
+       adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+       status = ABORT_RSS_STATUS_G(be32_to_cpu(srqidx_status));
+       srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
+
+       /*
+        * If this TCB had a srq buffer cached, then we must complete
+        * it. For user mode, that means saving the srqidx in the
+        * user/kernel status page for this qp.  For kernel mode, just
+        * synthesize the CQE now.
+        */
+       if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
+               if (ep->com.qp->ibqp.uobject)
+                       t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
+               else
+                       c4iw_flush_srqidx(ep->com.qp, srqidx);
+       }
+}
+
 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
        struct c4iw_ep *ep;
-       struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
+       struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
        int release = 0;
        unsigned int tid = GET_TID(rpl);
 
@@ -1865,6 +1889,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
                pr_warn("Abort rpl to freed endpoint\n");
                return 0;
        }
+
+       complete_cached_srq_buffers(ep, rpl->srqidx_status);
+
        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
        mutex_lock(&ep->com.mutex);
        switch (ep->com.state) {
@@ -2719,28 +2746,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
-       struct cpl_abort_req_rss *req = cplhdr(skb);
+       struct cpl_abort_req_rss6 *req = cplhdr(skb);
        struct c4iw_ep *ep;
        struct sk_buff *rpl_skb;
        struct c4iw_qp_attributes attrs;
        int ret;
        int release = 0;
        unsigned int tid = GET_TID(req);
+       u8 status;
+
        u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
        ep = get_ep_from_tid(dev, tid);
        if (!ep)
                return 0;
 
-       if (cxgb_is_neg_adv(req->status)) {
+       status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
+
+       if (cxgb_is_neg_adv(status)) {
                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
-                        ep->hwtid, req->status, neg_adv_str(req->status));
+                        ep->hwtid, status, neg_adv_str(status));
                ep->stats.abort_neg_adv++;
                mutex_lock(&dev->rdev.stats.lock);
                dev->rdev.stats.neg_adv++;
                mutex_unlock(&dev->rdev.stats.lock);
                goto deref_ep;
        }
+
+       complete_cached_srq_buffers(ep, req->srqidx_status);
+
        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
                 ep->com.state);
        set_bit(PEER_ABORT, &ep->com.history);
index a055f9f08e76b10e8bc214b49808c56c40bdd8b1..d266c8d0bf94d98d422733db80e14c5097592ac8 100644 (file)
@@ -182,7 +182,7 @@ err1:
        return ret;
 }
 
-static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx)
 {
        struct t4_cqe cqe;
 
@@ -195,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
                                 CQE_SWCQE_V(1) |
                                 CQE_QPID_V(wq->sq.qid));
        cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+       if (srqidx)
+               cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx);
        cq->sw_queue[cq->sw_pidx] = cqe;
        t4_swcq_produce(cq);
 }
@@ -207,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
        pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
                 wq, cq, wq->rq.in_use, count);
        while (in_use--) {
-               insert_recv_cqe(wq, cq);
+               insert_recv_cqe(wq, cq, 0);
                flushed++;
        }
        return flushed;
@@ -458,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
        pr_debug("cq %p count %d\n", cq, *count);
 }
 
+static void post_pending_srq_wrs(struct t4_srq *srq)
+{
+       struct t4_srq_pending_wr *pwr;
+       u16 idx = 0;
+
+       while (srq->pending_in_use) {
+               pwr = &srq->pending_wrs[srq->pending_cidx];
+               srq->sw_rq[srq->pidx].wr_id = pwr->wr_id;
+               srq->sw_rq[srq->pidx].valid = 1;
+
+               pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+                        __func__,
+                        srq->cidx, srq->pidx, srq->wq_pidx,
+                        srq->in_use, srq->size,
+                        (unsigned long long)pwr->wr_id);
+
+               c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16);
+               t4_srq_consume_pending_wr(srq);
+               t4_srq_produce(srq, pwr->len16);
+               idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE);
+       }
+
+       if (idx) {
+               t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe);
+               srq->queue[srq->size].status.host_wq_pidx =
+                       srq->wq_pidx;
+       }
+}
+
+static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq)
+{
+       int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx;
+       u64 wr_id;
+
+       srq->sw_rq[rel_idx].valid = 0;
+       wr_id = srq->sw_rq[rel_idx].wr_id;
+
+       if (rel_idx == srq->cidx) {
+               pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+                        __func__, rel_idx, srq->cidx, srq->pidx,
+                        srq->wq_pidx, srq->in_use, srq->size,
+                        (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+               t4_srq_consume(srq);
+               while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) {
+                       pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+                                __func__, srq->cidx, srq->pidx,
+                                srq->wq_pidx, srq->in_use,
+                                srq->size, srq->ooo_count,
+                                (unsigned long long)
+                                srq->sw_rq[srq->cidx].wr_id);
+                       t4_srq_consume_ooo(srq);
+               }
+               if (srq->ooo_count == 0 && srq->pending_in_use)
+                       post_pending_srq_wrs(srq);
+       } else {
+               pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+                        __func__, rel_idx, srq->cidx,
+                        srq->pidx, srq->wq_pidx,
+                        srq->in_use, srq->size,
+                        srq->ooo_count,
+                        (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+               t4_srq_produce_ooo(srq);
+       }
+       return wr_id;
+}
+
 /*
  * poll_cq
  *
@@ -475,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
  *    -EOVERFLOW    CQ overflow detected.
  */
 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
-                  u8 *cqe_flushed, u64 *cookie, u32 *credit)
+                  u8 *cqe_flushed, u64 *cookie, u32 *credit,
+                  struct t4_srq *srq)
 {
        int ret = 0;
        struct t4_cqe *hw_cqe, read_cqe;
@@ -540,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
                 */
                if (CQE_TYPE(hw_cqe) == 1) {
                        if (CQE_STATUS(hw_cqe))
-                               t4_set_wq_in_error(wq);
+                               t4_set_wq_in_error(wq, 0);
                        ret = -EAGAIN;
                        goto skip_cqe;
                }
@@ -551,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
                 */
                if (CQE_WRID_STAG(hw_cqe) == 1) {
                        if (CQE_STATUS(hw_cqe))
-                               t4_set_wq_in_error(wq);
+                               t4_set_wq_in_error(wq, 0);
                        ret = -EAGAIN;
                        goto skip_cqe;
                }
@@ -576,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 
        if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
                *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
-               t4_set_wq_in_error(wq);
+               t4_set_wq_in_error(wq, 0);
        }
 
        /*
@@ -590,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
                 * then we complete this with T4_ERR_MSN and mark the wq in
                 * error.
                 */
-
-               if (t4_rq_empty(wq)) {
-                       t4_set_wq_in_error(wq);
-                       ret = -EAGAIN;
-                       goto skip_cqe;
-               }
                if (unlikely(!CQE_STATUS(hw_cqe) &&
                             CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
-                       t4_set_wq_in_error(wq);
+                       t4_set_wq_in_error(wq, 0);
                        hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
                }
                goto proc_cqe;
@@ -657,11 +720,16 @@ proc_cqe:
                        c4iw_log_wr_stats(wq, hw_cqe);
                t4_sq_consume(wq);
        } else {
-               pr_debug("completing rq idx %u\n", wq->rq.cidx);
-               *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
-               if (c4iw_wr_log)
-                       c4iw_log_wr_stats(wq, hw_cqe);
-               t4_rq_consume(wq);
+               if (!srq) {
+                       pr_debug("completing rq idx %u\n", wq->rq.cidx);
+                       *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+                       if (c4iw_wr_log)
+                               c4iw_log_wr_stats(wq, hw_cqe);
+                       t4_rq_consume(wq);
+               } else {
+                       *cookie = reap_srq_cqe(hw_cqe, srq);
+               }
+               wq->rq.msn++;
                goto skip_cqe;
        }
 
@@ -685,7 +753,7 @@ skip_cqe:
 }
 
 static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
-                             struct ib_wc *wc)
+                             struct ib_wc *wc, struct c4iw_srq *srq)
 {
        struct t4_cqe uninitialized_var(cqe);
        struct t4_wq *wq = qhp ? &qhp->wq : NULL;
@@ -694,7 +762,8 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
        u64 cookie = 0;
        int ret;
 
-       ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+       ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit,
+                     srq ? &srq->wq : NULL);
        if (ret)
                goto out;
 
@@ -703,6 +772,13 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
        wc->vendor_err = CQE_STATUS(&cqe);
        wc->wc_flags = 0;
 
+       /*
+        * Simulate a SRQ_LIMIT_REACHED HW notification if required.
+        */
+       if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed &&
+           srq->wq.in_use < srq->srq_limit)
+               c4iw_dispatch_srq_limit_reached_event(srq);
+
        pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
                 CQE_QPID(&cqe),
                 CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
@@ -828,6 +904,7 @@ out:
  */
 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 {
+       struct c4iw_srq *srq = NULL;
        struct c4iw_qp *qhp = NULL;
        struct t4_cqe *rd_cqe;
        int ret;
@@ -840,10 +917,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
        qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
        if (qhp) {
                spin_lock(&qhp->lock);
-               ret = __c4iw_poll_cq_one(chp, qhp, wc);
+               srq = qhp->srq;
+               if (srq)
+                       spin_lock(&srq->lock);
+               ret = __c4iw_poll_cq_one(chp, qhp, wc, srq);
                spin_unlock(&qhp->lock);
+               if (srq)
+                       spin_unlock(&srq->lock);
        } else {
-               ret = __c4iw_poll_cq_one(chp, NULL, wc);
+               ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL);
        }
        return ret;
 }
@@ -1078,3 +1160,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
        spin_unlock_irqrestore(&chp->lock, flag);
        return ret;
 }
+
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx)
+{
+       struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+       unsigned long flag;
+
+       /* locking heirarchy: cq lock first, then qp lock. */
+       spin_lock_irqsave(&rchp->lock, flag);
+       spin_lock(&qhp->lock);
+
+       /* create a SRQ RECV CQE for srqidx */
+       insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx);
+
+       spin_unlock(&qhp->lock);
+       spin_unlock_irqrestore(&rchp->lock, flag);
+}
index a3c3418afd737bae7a97126e23b77539024b8dd8..5ef082bfa95a0bd68212de66dc021499c601f4df 100644 (file)
@@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void *data)
 
                        set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
                        cc = snprintf(qpd->buf + qpd->pos, space,
-                                     "rc qp sq id %u rq id %u state %u "
+                                     "rc qp sq id %u %s id %u state %u "
                                      "onchip %u ep tid %u state %u "
                                      "%pI4:%u/%u->%pI4:%u/%u\n",
-                                     qp->wq.sq.qid, qp->wq.rq.qid,
+                                     qp->wq.sq.qid, qp->srq ? "srq" : "rq",
+                                     qp->srq ? qp->srq->idx : qp->wq.rq.qid,
                                      (int)qp->attr.state,
                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
                                      ep->hwtid, (int)ep->com.state,
@@ -480,6 +481,9 @@ static int stats_show(struct seq_file *seq, void *v)
        seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
                        dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
                        dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
+       seq_printf(seq, "     SRQS: %10llu %10llu %10llu %10llu\n",
+                  dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur,
+                       dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail);
        seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
                        dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
                        dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
@@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
        dev->rdev.stats.pbl.fail = 0;
        dev->rdev.stats.rqt.max = 0;
        dev->rdev.stats.rqt.fail = 0;
+       dev->rdev.stats.rqt.max = 0;
+       dev->rdev.stats.rqt.fail = 0;
        dev->rdev.stats.ocqp.max = 0;
        dev->rdev.stats.ocqp.fail = 0;
        dev->rdev.stats.db_full = 0;
@@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 
        rdev->qpmask = rdev->lldi.udb_density - 1;
        rdev->cqmask = rdev->lldi.ucq_density - 1;
-       pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
+       pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
                 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
                 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
                 rdev->lldi.vr->pbl.start,
@@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
                 rdev->lldi.vr->qp.start,
                 rdev->lldi.vr->qp.size,
                 rdev->lldi.vr->cq.start,
-                rdev->lldi.vr->cq.size);
+                rdev->lldi.vr->cq.size,
+                rdev->lldi.vr->srq.size);
        pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
                 &rdev->lldi.pdev->resource[2],
                 rdev->lldi.db_reg, rdev->lldi.gts_reg,
@@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        rdev->stats.stag.total = rdev->lldi.vr->stag.size;
        rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
        rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
+       rdev->stats.srqt.total = rdev->lldi.vr->srq.size;
        rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
        rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 
-       err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
+       err = c4iw_init_resource(rdev, c4iw_num_stags(rdev),
+                                T4_MAX_NUM_PD, rdev->lldi.vr->srq.size);
        if (err) {
                pr_err("error %d initializing resources\n", err);
                return err;
index 1d567aaf88e3135c9d9e5b0f38c8cc43525819e7..047106cb0393711b97ae1db65589b25b148699ff 100644 (file)
@@ -1013,7 +1013,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
                   struct c4iw_dev_ucontext *uctx);
 u32 c4iw_get_resource(struct c4iw_id_table *id_table);
 void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+                      u32 nr_pdid, u32 nr_srqt);
 int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
 int c4iw_pblpool_create(struct c4iw_rdev *rdev);
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
index 61b8bdb9423d9a7575092f5546506bc2e255f41c..c314d8fdfbbabfca32e6bbdfead0c97535e58a3d 100644 (file)
@@ -342,9 +342,12 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
        props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
        props->max_mr_size = T4_MAX_MR_SIZE;
        props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
+       props->max_srq = dev->rdev.lldi.vr->srq.size;
        props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
+       props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth;
        props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE);
        props->max_recv_sge = T4_MAX_RECV_SGE;
+       props->max_srq_sge = T4_MAX_RECV_SGE;
        props->max_sge_rd = 1;
        props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
        props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp,
@@ -593,7 +596,10 @@ void c4iw_register_device(struct work_struct *work)
            (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
            (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
            (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-           (1ull << IB_USER_VERBS_CMD_POST_RECV);
+           (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+           (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+           (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+           (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
        dev->ibdev.node_type = RDMA_NODE_RNIC;
        BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
        memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
@@ -615,6 +621,9 @@ void c4iw_register_device(struct work_struct *work)
        dev->ibdev.modify_qp = c4iw_ib_modify_qp;
        dev->ibdev.query_qp = c4iw_ib_query_qp;
        dev->ibdev.destroy_qp = c4iw_destroy_qp;
+       dev->ibdev.create_srq = c4iw_create_srq;
+       dev->ibdev.modify_srq = c4iw_modify_srq;
+       dev->ibdev.destroy_srq = c4iw_destroy_srq;
        dev->ibdev.create_cq = c4iw_create_cq;
        dev->ibdev.destroy_cq = c4iw_destroy_cq;
        dev->ibdev.resize_cq = c4iw_resize_cq;
@@ -632,6 +641,7 @@ void c4iw_register_device(struct work_struct *work)
        dev->ibdev.req_notify_cq = c4iw_arm_cq;
        dev->ibdev.post_send = c4iw_post_send;
        dev->ibdev.post_recv = c4iw_post_receive;
+       dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
        dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
        dev->ibdev.get_hw_stats = c4iw_get_mib;
        dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
index aef53305f1c37f2dba46704866d18f799cc7c323..08dc555942aff4eaa5d47b8ff5eb51970cec28d0 100644 (file)
@@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
 }
 
 static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
-                     struct c4iw_dev_ucontext *uctx)
+                     struct c4iw_dev_ucontext *uctx, int has_rq)
 {
        /*
         * uP clears EQ contexts when the connection exits rdma mode,
         * so no need to post a RESET WR for these EQs.
         */
-       dma_free_coherent(&(rdev->lldi.pdev->dev),
-                         wq->rq.memsize, wq->rq.queue,
-                         dma_unmap_addr(&wq->rq, mapping));
        dealloc_sq(rdev, &wq->sq);
-       c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
-       kfree(wq->rq.sw_rq);
        kfree(wq->sq.sw_sq);
-       c4iw_put_qpid(rdev, wq->rq.qid, uctx);
        c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+
+       if (has_rq) {
+               dma_free_coherent(&rdev->lldi.pdev->dev,
+                                 wq->rq.memsize, wq->rq.queue,
+                                 dma_unmap_addr(&wq->rq, mapping));
+               c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+               kfree(wq->rq.sw_rq);
+               c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+       }
        return 0;
 }
 
@@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
 static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
                     struct t4_cq *rcq, struct t4_cq *scq,
                     struct c4iw_dev_ucontext *uctx,
-                    struct c4iw_wr_wait *wr_waitp)
+                    struct c4iw_wr_wait *wr_waitp,
+                    int need_rq)
 {
        int user = (uctx != &rdev->uctx);
        struct fw_ri_res_wr *res_wr;
@@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
        if (!wq->sq.qid)
                return -ENOMEM;
 
-       wq->rq.qid = c4iw_get_qpid(rdev, uctx);
-       if (!wq->rq.qid) {
-               ret = -ENOMEM;
-               goto free_sq_qid;
+       if (need_rq) {
+               wq->rq.qid = c4iw_get_qpid(rdev, uctx);
+               if (!wq->rq.qid) {
+                       ret = -ENOMEM;
+                       goto free_sq_qid;
+               }
        }
 
        if (!user) {
@@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
                                       GFP_KERNEL);
                if (!wq->sq.sw_sq) {
                        ret = -ENOMEM;
-                       goto free_rq_qid;
+                       goto free_rq_qid;//FIXME
                }
 
-               wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq),
-                                      GFP_KERNEL);
-               if (!wq->rq.sw_rq) {
-                       ret = -ENOMEM;
-                       goto free_sw_sq;
+               if (need_rq) {
+                       wq->rq.sw_rq = kcalloc(wq->rq.size,
+                                              sizeof(*wq->rq.sw_rq),
+                                              GFP_KERNEL);
+                       if (!wq->rq.sw_rq) {
+                               ret = -ENOMEM;
+                               goto free_sw_sq;
+                       }
                }
        }
 
-       /*
-        * RQT must be a power of 2 and at least 16 deep.
-        */
-       wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
-       wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
-       if (!wq->rq.rqt_hwaddr) {
-               ret = -ENOMEM;
-               goto free_sw_rq;
+       if (need_rq) {
+               /*
+                * RQT must be a power of 2 and at least 16 deep.
+                */
+               wq->rq.rqt_size =
+                       roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
+               wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
+               if (!wq->rq.rqt_hwaddr) {
+                       ret = -ENOMEM;
+                       goto free_sw_rq;
+               }
        }
 
        ret = alloc_sq(rdev, &wq->sq, user);
@@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
        memset(wq->sq.queue, 0, wq->sq.memsize);
        dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
 
-       wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
-                                         wq->rq.memsize, &(wq->rq.dma_addr),
-                                         GFP_KERNEL);
-       if (!wq->rq.queue) {
-               ret = -ENOMEM;
-               goto free_sq;
+       if (need_rq) {
+               wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+                                                 wq->rq.memsize,
+                                                 &wq->rq.dma_addr,
+                                                 GFP_KERNEL);
+               if (!wq->rq.queue) {
+                       ret = -ENOMEM;
+                       goto free_sq;
+               }
+               pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+                        wq->sq.queue,
+                        (unsigned long long)virt_to_phys(wq->sq.queue),
+                        wq->rq.queue,
+                        (unsigned long long)virt_to_phys(wq->rq.queue));
+               memset(wq->rq.queue, 0, wq->rq.memsize);
+               dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
        }
-       pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
-                wq->sq.queue,
-                (unsigned long long)virt_to_phys(wq->sq.queue),
-                wq->rq.queue,
-                (unsigned long long)virt_to_phys(wq->rq.queue));
-       memset(wq->rq.queue, 0, wq->rq.memsize);
-       dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 
        wq->db = rdev->lldi.db_reg;
 
        wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
                                         &wq->sq.bar2_qid,
                                         user ? &wq->sq.bar2_pa : NULL);
-       wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS,
-                                        &wq->rq.bar2_qid,
-                                        user ? &wq->rq.bar2_pa : NULL);
+       if (need_rq)
+               wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
+                                                T4_BAR2_QTYPE_EGRESS,
+                                                &wq->rq.bar2_qid,
+                                                user ? &wq->rq.bar2_pa : NULL);
 
        /*
         * User mode must have bar2 access.
         */
-       if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
+       if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
                pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
                        pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
                goto free_dma;
@@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
        /* build fw_ri_res_wr */
        wr_len = sizeof *res_wr + 2 * sizeof *res;
-
+       if (need_rq)
+               wr_len += sizeof(*res);
        skb = alloc_skb(wr_len, GFP_KERNEL);
        if (!skb) {
                ret = -ENOMEM;
@@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
        res_wr = __skb_put_zero(skb, wr_len);
        res_wr->op_nres = cpu_to_be32(
                        FW_WR_OP_V(FW_RI_RES_WR) |
-                       FW_RI_RES_WR_NRES_V(2) |
+                       FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
                        FW_WR_COMPL_F);
        res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
        res_wr->cookie = (uintptr_t)wr_waitp;
@@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
                FW_RI_RES_WR_EQSIZE_V(eqsize));
        res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
        res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
-       res++;
-       res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
-       res->u.sqrq.op = FW_RI_RES_OP_WRITE;
 
-       /*
-        * eqsize is the number of 64B entries plus the status page size.
-        */
-       eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
-               rdev->hw_queue.t4_eq_status_entries;
-       res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
-               FW_RI_RES_WR_HOSTFCMODE_V(0) |  /* no host cidx updates */
-               FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
-               FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
-               FW_RI_RES_WR_IQID_V(rcq->cqid));
-       res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
-               FW_RI_RES_WR_DCAEN_V(0) |
-               FW_RI_RES_WR_DCACPU_V(0) |
-               FW_RI_RES_WR_FBMIN_V(2) |
-               FW_RI_RES_WR_FBMAX_V(3) |
-               FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
-               FW_RI_RES_WR_CIDXFTHRESH_V(0) |
-               FW_RI_RES_WR_EQSIZE_V(eqsize));
-       res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
-       res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+       if (need_rq) {
+               res++;
+               res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
+               res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+               /*
+                * eqsize is the number of 64B entries plus the status page size
+                */
+               eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
+                       rdev->hw_queue.t4_eq_status_entries;
+               res->u.sqrq.fetchszm_to_iqid =
+                       /* no host cidx updates */
+                       cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+                       /* don't keep in chip cache */
+                       FW_RI_RES_WR_CPRIO_V(0) |
+                       /* set by uP at ri_init time */
+                       FW_RI_RES_WR_PCIECHN_V(0) |
+                       FW_RI_RES_WR_IQID_V(rcq->cqid));
+               res->u.sqrq.dcaen_to_eqsize =
+                       cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+                       FW_RI_RES_WR_DCACPU_V(0) |
+                       FW_RI_RES_WR_FBMIN_V(2) |
+                       FW_RI_RES_WR_FBMAX_V(3) |
+                       FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+                       FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+                       FW_RI_RES_WR_EQSIZE_V(eqsize));
+               res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
+               res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+       }
 
        c4iw_init_wr_wait(wr_waitp);
        ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
@@ -363,19 +387,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
        return 0;
 free_dma:
-       dma_free_coherent(&(rdev->lldi.pdev->dev),
-                         wq->rq.memsize, wq->rq.queue,
-                         dma_unmap_addr(&wq->rq, mapping));
+       if (need_rq)
+               dma_free_coherent(&rdev->lldi.pdev->dev,
+                                 wq->rq.memsize, wq->rq.queue,
+                                 dma_unmap_addr(&wq->rq, mapping));
 free_sq:
        dealloc_sq(rdev, &wq->sq);
 free_hwaddr:
-       c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+       if (need_rq)
+               c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
 free_sw_rq:
-       kfree(wq->rq.sw_rq);
+       if (need_rq)
+               kfree(wq->rq.sw_rq);
 free_sw_sq:
        kfree(wq->sq.sw_sq);
 free_rq_qid:
-       c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+       if (need_rq)
+               c4iw_put_qpid(rdev, wq->rq.qid, uctx);
 free_sq_qid:
        c4iw_put_qpid(rdev, wq->sq.qid, uctx);
        return ret;
@@ -605,6 +633,20 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
        return 0;
 }
 
+static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr,
+                         u8 *len16)
+{
+       int ret;
+
+       ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
+                        &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+       if (ret)
+               return ret;
+       *len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
+                             wr->num_sge * sizeof(struct fw_ri_sge), 16);
+       return 0;
+}
+
 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
                              struct ib_reg_wr *wr, struct c4iw_mr *mhp,
                              u8 *len16)
@@ -721,7 +763,7 @@ static void free_qp_work(struct work_struct *work)
 
        pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
        destroy_qp(&rhp->rdev, &qhp->wq,
-                  ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+                  ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
 
        if (ucontext)
                c4iw_put_ucontext(ucontext);
@@ -1145,6 +1187,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        return err;
 }
 
+static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
+                        u64 wr_id, u8 len16)
+{
+       struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
+
+       pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
+                __func__, srq->cidx, srq->pidx, srq->wq_pidx,
+                srq->in_use, srq->ooo_count,
+                (unsigned long long)wr_id, srq->pending_cidx,
+                srq->pending_pidx, srq->pending_in_use);
+       pwr->wr_id = wr_id;
+       pwr->len16 = len16;
+       memcpy(&pwr->wqe, wqe, len16 * 16);
+       t4_srq_produce_pending_wr(srq);
+}
+
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+                      struct ib_recv_wr **bad_wr)
+{
+       union t4_recv_wr *wqe, lwqe;
+       struct c4iw_srq *srq;
+       unsigned long flag;
+       u8 len16 = 0;
+       u16 idx = 0;
+       int err = 0;
+       u32 num_wrs;
+
+       srq = to_c4iw_srq(ibsrq);
+       spin_lock_irqsave(&srq->lock, flag);
+       num_wrs = t4_srq_avail(&srq->wq);
+       if (num_wrs == 0) {
+               spin_unlock_irqrestore(&srq->lock, flag);
+               return -ENOMEM;
+       }
+       while (wr) {
+               if (wr->num_sge > T4_MAX_RECV_SGE) {
+                       err = -EINVAL;
+                       *bad_wr = wr;
+                       break;
+               }
+               wqe = &lwqe;
+               if (num_wrs)
+                       err = build_srq_recv(wqe, wr, &len16);
+               else
+                       err = -ENOMEM;
+               if (err) {
+                       *bad_wr = wr;
+                       break;
+               }
+
+               wqe->recv.opcode = FW_RI_RECV_WR;
+               wqe->recv.r1 = 0;
+               wqe->recv.wrid = srq->wq.pidx;
+               wqe->recv.r2[0] = 0;
+               wqe->recv.r2[1] = 0;
+               wqe->recv.r2[2] = 0;
+               wqe->recv.len16 = len16;
+
+               if (srq->wq.ooo_count ||
+                   srq->wq.pending_in_use ||
+                   srq->wq.sw_rq[srq->wq.pidx].valid) {
+                       defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
+               } else {
+                       srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
+                       srq->wq.sw_rq[srq->wq.pidx].valid = 1;
+                       c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
+                       pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
+                                __func__, srq->wq.cidx,
+                                srq->wq.pidx, srq->wq.wq_pidx,
+                                srq->wq.in_use,
+                                (unsigned long long)wr->wr_id);
+                       t4_srq_produce(&srq->wq, len16);
+                       idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+               }
+               wr = wr->next;
+               num_wrs--;
+       }
+       if (idx)
+               t4_ring_srq_db(&srq->wq, idx, len16, wqe);
+       spin_unlock_irqrestore(&srq->lock, flag);
+       return err;
+}
+
 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
                                    u8 *ecode)
 {
@@ -1321,7 +1446,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
                       struct c4iw_cq *schp)
 {
        int count;
-       int rq_flushed, sq_flushed;
+       int rq_flushed = 0, sq_flushed;
        unsigned long flag;
 
        pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
@@ -1340,11 +1465,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
                return;
        }
        qhp->wq.flushed = 1;
-       t4_set_wq_in_error(&qhp->wq);
+       t4_set_wq_in_error(&qhp->wq, 0);
 
        c4iw_flush_hw_cq(rchp, qhp);
-       c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
-       rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+       if (!qhp->srq) {
+               c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
+               rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+       }
 
        if (schp != rchp)
                c4iw_flush_hw_cq(schp, qhp);
@@ -1388,7 +1515,7 @@ static void flush_qp(struct c4iw_qp *qhp)
        schp = to_c4iw_cq(qhp->ibqp.send_cq);
 
        if (qhp->ibqp.uobject) {
-               t4_set_wq_in_error(&qhp->wq);
+               t4_set_wq_in_error(&qhp->wq, 0);
                t4_set_cq_in_error(&rchp->cq);
                spin_lock_irqsave(&rchp->comp_handler_lock, flag);
                (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1517,16 +1644,21 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
        wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
        wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
        wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
-       wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+       if (qhp->srq) {
+               wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
+                                                 qhp->srq->idx);
+       } else {
+               wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+               wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
+               wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
+                                                  rhp->rdev.lldi.vr->rq.start);
+       }
        wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
        wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
        wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
        wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
        wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
        wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
-       wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
-       wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
-                                        rhp->rdev.lldi.vr->rq.start);
        if (qhp->attr.mpa_attr.initiator)
                build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
 
@@ -1643,7 +1775,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
        case C4IW_QP_STATE_RTS:
                switch (attrs->next_state) {
                case C4IW_QP_STATE_CLOSING:
-                       t4_set_wq_in_error(&qhp->wq);
+                       t4_set_wq_in_error(&qhp->wq, 0);
                        set_state(qhp, C4IW_QP_STATE_CLOSING);
                        ep = qhp->ep;
                        if (!internal) {
@@ -1656,7 +1788,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
                                goto err;
                        break;
                case C4IW_QP_STATE_TERMINATE:
-                       t4_set_wq_in_error(&qhp->wq);
+                       t4_set_wq_in_error(&qhp->wq, 0);
                        set_state(qhp, C4IW_QP_STATE_TERMINATE);
                        qhp->attr.layer_etype = attrs->layer_etype;
                        qhp->attr.ecode = attrs->ecode;
@@ -1673,7 +1805,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
                        }
                        break;
                case C4IW_QP_STATE_ERROR:
-                       t4_set_wq_in_error(&qhp->wq);
+                       t4_set_wq_in_error(&qhp->wq, 0);
                        set_state(qhp, C4IW_QP_STATE_ERROR);
                        if (!internal) {
                                abort = 1;
@@ -1819,7 +1951,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        struct c4iw_cq *schp;
        struct c4iw_cq *rchp;
        struct c4iw_create_qp_resp uresp;
-       unsigned int sqsize, rqsize;
+       unsigned int sqsize, rqsize = 0;
        struct c4iw_ucontext *ucontext;
        int ret;
        struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
@@ -1840,11 +1972,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
                return ERR_PTR(-EINVAL);
 
-       if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
-               return ERR_PTR(-E2BIG);
-       rqsize = attrs->cap.max_recv_wr + 1;
-       if (rqsize < 8)
-               rqsize = 8;
+       if (!attrs->srq) {
+               if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+                       return ERR_PTR(-E2BIG);
+               rqsize = attrs->cap.max_recv_wr + 1;
+               if (rqsize < 8)
+                       rqsize = 8;
+       }
 
        if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
                return ERR_PTR(-E2BIG);
@@ -1869,19 +2003,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
                sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
        qhp->wq.sq.flush_cidx = -1;
-       qhp->wq.rq.size = rqsize;
-       qhp->wq.rq.memsize =
-               (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
-               sizeof(*qhp->wq.rq.queue);
+       if (!attrs->srq) {
+               qhp->wq.rq.size = rqsize;
+               qhp->wq.rq.memsize =
+                       (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+                       sizeof(*qhp->wq.rq.queue);
+       }
 
        if (ucontext) {
                qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
-               qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
+               if (!attrs->srq)
+                       qhp->wq.rq.memsize =
+                               roundup(qhp->wq.rq.memsize, PAGE_SIZE);
        }
 
        ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
                        ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
-                       qhp->wr_waitp);
+                       qhp->wr_waitp, !attrs->srq);
        if (ret)
                goto err_free_wr_wait;
 
@@ -1894,10 +2032,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
        qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
        qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
-       qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
        qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
        qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
-       qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+       if (!attrs->srq) {
+               qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+               qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+       }
        qhp->attr.state = C4IW_QP_STATE_IDLE;
        qhp->attr.next_state = C4IW_QP_STATE_IDLE;
        qhp->attr.enable_rdma_read = 1;
@@ -1922,20 +2062,25 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                        ret = -ENOMEM;
                        goto err_remove_handle;
                }
-               rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
-               if (!rq_key_mm) {
-                       ret = -ENOMEM;
-                       goto err_free_sq_key;
+               if (!attrs->srq) {
+                       rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
+                       if (!rq_key_mm) {
+                               ret = -ENOMEM;
+                               goto err_free_sq_key;
+                       }
                }
                sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
                if (!sq_db_key_mm) {
                        ret = -ENOMEM;
                        goto err_free_rq_key;
                }
-               rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
-               if (!rq_db_key_mm) {
-                       ret = -ENOMEM;
-                       goto err_free_sq_db_key;
+               if (!attrs->srq) {
+                       rq_db_key_mm =
+                               kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
+                       if (!rq_db_key_mm) {
+                               ret = -ENOMEM;
+                               goto err_free_sq_db_key;
+                       }
                }
                if (t4_sq_onchip(&qhp->wq.sq)) {
                        ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
@@ -1951,9 +2096,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                uresp.sqid = qhp->wq.sq.qid;
                uresp.sq_size = qhp->wq.sq.size;
                uresp.sq_memsize = qhp->wq.sq.memsize;
-               uresp.rqid = qhp->wq.rq.qid;
-               uresp.rq_size = qhp->wq.rq.size;
-               uresp.rq_memsize = qhp->wq.rq.memsize;
+               if (!attrs->srq) {
+                       uresp.rqid = qhp->wq.rq.qid;
+                       uresp.rq_size = qhp->wq.rq.size;
+                       uresp.rq_memsize = qhp->wq.rq.memsize;
+               }
                spin_lock(&ucontext->mmap_lock);
                if (ma_sync_key_mm) {
                        uresp.ma_sync_key = ucontext->key;
@@ -1963,12 +2110,16 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                }
                uresp.sq_key = ucontext->key;
                ucontext->key += PAGE_SIZE;
-               uresp.rq_key = ucontext->key;
-               ucontext->key += PAGE_SIZE;
+               if (!attrs->srq) {
+                       uresp.rq_key = ucontext->key;
+                       ucontext->key += PAGE_SIZE;
+               }
                uresp.sq_db_gts_key = ucontext->key;
                ucontext->key += PAGE_SIZE;
-               uresp.rq_db_gts_key = ucontext->key;
-               ucontext->key += PAGE_SIZE;
+               if (!attrs->srq) {
+                       uresp.rq_db_gts_key = ucontext->key;
+                       ucontext->key += PAGE_SIZE;
+               }
                spin_unlock(&ucontext->mmap_lock);
                ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
                if (ret)
@@ -1977,18 +2128,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                sq_key_mm->addr = qhp->wq.sq.phys_addr;
                sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
                insert_mmap(ucontext, sq_key_mm);
-               rq_key_mm->key = uresp.rq_key;
-               rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
-               rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
-               insert_mmap(ucontext, rq_key_mm);
+               if (!attrs->srq) {
+                       rq_key_mm->key = uresp.rq_key;
+                       rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+                       rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+                       insert_mmap(ucontext, rq_key_mm);
+               }
                sq_db_key_mm->key = uresp.sq_db_gts_key;
                sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
                sq_db_key_mm->len = PAGE_SIZE;
                insert_mmap(ucontext, sq_db_key_mm);
-               rq_db_key_mm->key = uresp.rq_db_gts_key;
-               rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
-               rq_db_key_mm->len = PAGE_SIZE;
-               insert_mmap(ucontext, rq_db_key_mm);
+               if (!attrs->srq) {
+                       rq_db_key_mm->key = uresp.rq_db_gts_key;
+                       rq_db_key_mm->addr =
+                               (u64)(unsigned long)qhp->wq.rq.bar2_pa;
+                       rq_db_key_mm->len = PAGE_SIZE;
+                       insert_mmap(ucontext, rq_db_key_mm);
+               }
                if (ma_sync_key_mm) {
                        ma_sync_key_mm->key = uresp.ma_sync_key;
                        ma_sync_key_mm->addr =
@@ -2001,7 +2157,19 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                c4iw_get_ucontext(ucontext);
                qhp->ucontext = ucontext;
        }
+       if (!attrs->srq) {
+               qhp->wq.qp_errp =
+                       &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
+       } else {
+               qhp->wq.qp_errp =
+                       &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
+               qhp->wq.srqidxp =
+                       &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
+       }
+
        qhp->ibqp.qp_num = qhp->wq.sq.qid;
+       if (attrs->srq)
+               qhp->srq = to_c4iw_srq(attrs->srq);
        INIT_LIST_HEAD(&qhp->db_fc_entry);
        pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
                 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
@@ -2011,18 +2179,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 err_free_ma_sync_key:
        kfree(ma_sync_key_mm);
 err_free_rq_db_key:
-       kfree(rq_db_key_mm);
+       if (!attrs->srq)
+               kfree(rq_db_key_mm);
 err_free_sq_db_key:
        kfree(sq_db_key_mm);
 err_free_rq_key:
-       kfree(rq_key_mm);
+       if (!attrs->srq)
+               kfree(rq_key_mm);
 err_free_sq_key:
        kfree(sq_key_mm);
 err_remove_handle:
        remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
 err_destroy_qp:
        destroy_qp(&rhp->rdev, &qhp->wq,
-                  ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+                  ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
 err_free_wr_wait:
        c4iw_put_wr_wait(qhp->wr_waitp);
 err_free_qhp:
@@ -2088,6 +2258,45 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
        return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
 }
 
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
+{
+       struct ib_event event = {0};
+
+       event.device = &srq->rhp->ibdev;
+       event.element.srq = &srq->ibsrq;
+       event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+       ib_dispatch_event(&event);
+}
+
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+                   enum ib_srq_attr_mask srq_attr_mask,
+                   struct ib_udata *udata)
+{
+       struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
+       int ret = 0;
+
+       /*
+        * XXX 0 mask == a SW interrupt for srq_limit reached...
+        */
+       if (udata && !srq_attr_mask) {
+               c4iw_dispatch_srq_limit_reached_event(srq);
+               goto out;
+       }
+
+       /* no support for this yet */
+       if (srq_attr_mask & IB_SRQ_MAX_WR) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
+               srq->armed = true;
+               srq->srq_limit = attr->srq_limit;
+       }
+out:
+       return ret;
+}
+
 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                     int attr_mask, struct ib_qp_init_attr *init_attr)
 {
@@ -2104,3 +2313,358 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
        return 0;
 }
+
+static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+                          struct c4iw_wr_wait *wr_waitp)
+{
+       struct c4iw_rdev *rdev = &srq->rhp->rdev;
+       struct sk_buff *skb = srq->destroy_skb;
+       struct t4_srq *wq = &srq->wq;
+       struct fw_ri_res_wr *res_wr;
+       struct fw_ri_res *res;
+       int wr_len;
+
+       wr_len = sizeof(*res_wr) + sizeof(*res);
+       set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+       res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+       memset(res_wr, 0, wr_len);
+       res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+                       FW_RI_RES_WR_NRES_V(1) |
+                       FW_WR_COMPL_F);
+       res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+       res_wr->cookie = (uintptr_t)wr_waitp;
+       res = res_wr->res;
+       res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+       res->u.srq.op = FW_RI_RES_OP_RESET;
+       res->u.srq.srqid = cpu_to_be32(srq->idx);
+       res->u.srq.eqid = cpu_to_be32(wq->qid);
+
+       c4iw_init_wr_wait(wr_waitp);
+       c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+
+       dma_free_coherent(&rdev->lldi.pdev->dev,
+                         wq->memsize, wq->queue,
+                       pci_unmap_addr(wq, mapping));
+       c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+       kfree(wq->sw_rq);
+       c4iw_put_qpid(rdev, wq->qid, uctx);
+}
+
+static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+                          struct c4iw_wr_wait *wr_waitp)
+{
+       struct c4iw_rdev *rdev = &srq->rhp->rdev;
+       int user = (uctx != &rdev->uctx);
+       struct t4_srq *wq = &srq->wq;
+       struct fw_ri_res_wr *res_wr;
+       struct fw_ri_res *res;
+       struct sk_buff *skb;
+       int wr_len;
+       int eqsize;
+       int ret = -ENOMEM;
+
+       wq->qid = c4iw_get_qpid(rdev, uctx);
+       if (!wq->qid)
+               goto err;
+
+       if (!user) {
+               wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
+                                   GFP_KERNEL);
+               if (!wq->sw_rq)
+                       goto err_put_qpid;
+               wq->pending_wrs = kcalloc(srq->wq.size,
+                                         sizeof(*srq->wq.pending_wrs),
+                                         GFP_KERNEL);
+               if (!wq->pending_wrs)
+                       goto err_free_sw_rq;
+       }
+
+       wq->rqt_size = wq->size;
+       wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
+       if (!wq->rqt_hwaddr)
+               goto err_free_pending_wrs;
+       wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
+               T4_RQT_ENTRY_SHIFT;
+
+       wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+                                      wq->memsize, &wq->dma_addr,
+                       GFP_KERNEL);
+       if (!wq->queue)
+               goto err_free_rqtpool;
+
+       memset(wq->queue, 0, wq->memsize);
+       pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+
+       wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+                                     &wq->bar2_qid,
+                       user ? &wq->bar2_pa : NULL);
+
+       /*
+        * User mode must have bar2 access.
+        */
+
+       if (user && !wq->bar2_va) {
+               pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
+                       pci_name(rdev->lldi.pdev), wq->qid);
+               ret = -EINVAL;
+               goto err_free_queue;
+       }
+
+       /* build fw_ri_res_wr */
+       wr_len = sizeof(*res_wr) + sizeof(*res);
+
+       skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+       if (!skb)
+               goto err_free_queue;
+       set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+       res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+       memset(res_wr, 0, wr_len);
+       res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+                       FW_RI_RES_WR_NRES_V(1) |
+                       FW_WR_COMPL_F);
+       res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+       res_wr->cookie = (uintptr_t)wr_waitp;
+       res = res_wr->res;
+       res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+       res->u.srq.op = FW_RI_RES_OP_WRITE;
+
+       /*
+        * eqsize is the number of 64B entries plus the status page size.
+        */
+       eqsize = wq->size * T4_RQ_NUM_SLOTS +
+               rdev->hw_queue.t4_eq_status_entries;
+       res->u.srq.eqid = cpu_to_be32(wq->qid);
+       res->u.srq.fetchszm_to_iqid =
+                                               /* no host cidx updates */
+               cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+               FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
+               FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
+               FW_RI_RES_WR_FETCHRO_V(0));     /* relaxed_ordering */
+       res->u.srq.dcaen_to_eqsize =
+               cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+               FW_RI_RES_WR_DCACPU_V(0) |
+               FW_RI_RES_WR_FBMIN_V(2) |
+               FW_RI_RES_WR_FBMAX_V(3) |
+               FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+               FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+               FW_RI_RES_WR_EQSIZE_V(eqsize));
+       res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
+       res->u.srq.srqid = cpu_to_be32(srq->idx);
+       res->u.srq.pdid = cpu_to_be32(srq->pdid);
+       res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
+       res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
+                       rdev->lldi.vr->rq.start);
+
+       c4iw_init_wr_wait(wr_waitp);
+
+       ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
+       if (ret)
+               goto err_free_queue;
+
+       pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
+                       " bar2_addr %p rqt addr 0x%x size %d\n",
+                       __func__, srq->idx, wq->qid, srq->pdid, wq->queue,
+                       (u64)virt_to_phys(wq->queue), wq->bar2_va,
+                       wq->rqt_hwaddr, wq->rqt_size);
+
+       return 0;
+err_free_queue:
+       dma_free_coherent(&rdev->lldi.pdev->dev,
+                         wq->memsize, wq->queue,
+                       pci_unmap_addr(wq, mapping));
+err_free_rqtpool:
+       c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+err_free_pending_wrs:
+       if (!user)
+               kfree(wq->pending_wrs);
+err_free_sw_rq:
+       if (!user)
+               kfree(wq->sw_rq);
+err_put_qpid:
+       c4iw_put_qpid(rdev, wq->qid, uctx);
+err:
+       return ret;
+}
+
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
+{
+       u64 *src, *dst;
+
+       src = (u64 *)wqe;
+       dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
+       while (len16) {
+               *dst++ = *src++;
+               if (dst >= (u64 *)&srq->queue[srq->size])
+                       dst = (u64 *)srq->queue;
+               *dst++ = *src++;
+               if (dst >= (u64 *)&srq->queue[srq->size])
+                       dst = (u64 *)srq->queue;
+               len16--;
+       }
+}
+
+struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
+                              struct ib_udata *udata)
+{
+       struct c4iw_dev *rhp;
+       struct c4iw_srq *srq;
+       struct c4iw_pd *php;
+       struct c4iw_create_srq_resp uresp;
+       struct c4iw_ucontext *ucontext;
+       struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
+       int rqsize;
+       int ret;
+       int wr_len;
+
+       pr_debug("%s ib_pd %p\n", __func__, pd);
+
+       php = to_c4iw_pd(pd);
+       rhp = php->rhp;
+
+       if (!rhp->rdev.lldi.vr->srq.size)
+               return ERR_PTR(-EINVAL);
+       if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+               return ERR_PTR(-E2BIG);
+       if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
+               return ERR_PTR(-E2BIG);
+
+       /*
+        * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
+        */
+       rqsize = attrs->attr.max_wr + 1;
+       rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
+
+       ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+
+       srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+       if (!srq)
+               return ERR_PTR(-ENOMEM);
+
+       srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+       if (!srq->wr_waitp) {
+               ret = -ENOMEM;
+               goto err_free_srq;
+       }
+
+       srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
+       if (srq->idx < 0) {
+               ret = -ENOMEM;
+               goto err_free_wr_wait;
+       }
+
+       wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+       srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+       if (!srq->destroy_skb) {
+               ret = -ENOMEM;
+               goto err_free_srq_idx;
+       }
+
+       srq->rhp = rhp;
+       srq->pdid = php->pdid;
+
+       srq->wq.size = rqsize;
+       srq->wq.memsize =
+               (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+               sizeof(*srq->wq.queue);
+       if (ucontext)
+               srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
+
+       ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
+                       &rhp->rdev.uctx, srq->wr_waitp);
+       if (ret)
+               goto err_free_skb;
+       attrs->attr.max_wr = rqsize - 1;
+
+       if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
+               srq->flags = T4_SRQ_LIMIT_SUPPORT;
+
+       ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
+       if (ret)
+               goto err_free_queue;
+
+       if (udata) {
+               srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
+               if (!srq_key_mm) {
+                       ret = -ENOMEM;
+                       goto err_remove_handle;
+               }
+               srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
+               if (!srq_db_key_mm) {
+                       ret = -ENOMEM;
+                       goto err_free_srq_key_mm;
+               }
+               uresp.flags = srq->flags;
+               uresp.qid_mask = rhp->rdev.qpmask;
+               uresp.srqid = srq->wq.qid;
+               uresp.srq_size = srq->wq.size;
+               uresp.srq_memsize = srq->wq.memsize;
+               uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
+               spin_lock(&ucontext->mmap_lock);
+               uresp.srq_key = ucontext->key;
+               ucontext->key += PAGE_SIZE;
+               uresp.srq_db_gts_key = ucontext->key;
+               ucontext->key += PAGE_SIZE;
+               spin_unlock(&ucontext->mmap_lock);
+               ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+               if (ret)
+                       goto err_free_srq_db_key_mm;
+               srq_key_mm->key = uresp.srq_key;
+               srq_key_mm->addr = virt_to_phys(srq->wq.queue);
+               srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+               insert_mmap(ucontext, srq_key_mm);
+               srq_db_key_mm->key = uresp.srq_db_gts_key;
+               srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
+               srq_db_key_mm->len = PAGE_SIZE;
+               insert_mmap(ucontext, srq_db_key_mm);
+       }
+
+       pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
+                __func__, srq->wq.qid, srq->idx, srq->wq.size,
+                       (unsigned long)srq->wq.memsize, attrs->attr.max_wr);
+
+       spin_lock_init(&srq->lock);
+       return &srq->ibsrq;
+err_free_srq_db_key_mm:
+       kfree(srq_db_key_mm);
+err_free_srq_key_mm:
+       kfree(srq_key_mm);
+err_remove_handle:
+       remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+err_free_queue:
+       free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+                      srq->wr_waitp);
+err_free_skb:
+       if (srq->destroy_skb)
+               kfree_skb(srq->destroy_skb);
+err_free_srq_idx:
+       c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+err_free_wr_wait:
+       c4iw_put_wr_wait(srq->wr_waitp);
+err_free_srq:
+       kfree(srq);
+       return ERR_PTR(ret);
+}
+
+int c4iw_destroy_srq(struct ib_srq *ibsrq)
+{
+       struct c4iw_dev *rhp;
+       struct c4iw_srq *srq;
+       struct c4iw_ucontext *ucontext;
+
+       srq = to_c4iw_srq(ibsrq);
+       rhp = srq->rhp;
+
+       pr_debug("%s id %d\n", __func__, srq->wq.qid);
+
+       remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+       ucontext = ibsrq->uobject ?
+               to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
+       free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+                      srq->wr_waitp);
+       c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+       c4iw_put_wr_wait(srq->wr_waitp);
+       kfree(srq);
+       return 0;
+}
index 0ef25ae05e6fee35e4a7baf3f48181ee37f6deb5..57ed26b3cc21a6b244af622252fa1646eeeb6edd 100644 (file)
@@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
 }
 
 /* nr_* must be power of 2 */
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+                      u32 nr_pdid, u32 nr_srqt)
 {
        int err = 0;
        err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
@@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
                                        nr_pdid, 1, 0);
        if (err)
                goto pdid_err;
+       if (!nr_srqt)
+               err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+                                         1, 1, 0);
+       else
+               err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+                                         nr_srqt, 0, 0);
+       if (err)
+               goto srq_err;
        return 0;
+ srq_err:
+       c4iw_id_table_free(&rdev->resource.pdid_table);
  pdid_err:
        c4iw_id_table_free(&rdev->resource.qid_table);
  qid_err:
@@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
 {
        unsigned rqt_start, rqt_chunk, rqt_top;
+       int skip = 0;
 
        rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
        if (!rdev->rqt_pool)
                return -ENOMEM;
 
-       rqt_start = rdev->lldi.vr->rq.start;
-       rqt_chunk = rdev->lldi.vr->rq.size;
+       /*
+        * If SRQs are supported, then never use the first RQE from
+        * the RQT region. This is because HW uses RQT index 0 as NULL.
+        */
+       if (rdev->lldi.vr->srq.size)
+               skip = T4_RQT_ENTRY_SIZE;
+
+       rqt_start = rdev->lldi.vr->rq.start + skip;
+       rqt_chunk = rdev->lldi.vr->rq.size - skip;
        rqt_top = rqt_start + rqt_chunk;
 
        while (rqt_start < rqt_top) {
@@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
        kref_put(&rdev->rqt_kref, destroy_rqtpool);
 }
 
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev)
+{
+       int idx;
+
+       idx = c4iw_id_alloc(&rdev->resource.srq_table);
+       mutex_lock(&rdev->stats.lock);
+       if (idx == -1) {
+               rdev->stats.srqt.fail++;
+               mutex_unlock(&rdev->stats.lock);
+               return -ENOMEM;
+       }
+       rdev->stats.srqt.cur++;
+       if (rdev->stats.srqt.cur > rdev->stats.srqt.max)
+               rdev->stats.srqt.max = rdev->stats.srqt.cur;
+       mutex_unlock(&rdev->stats.lock);
+       return idx;
+}
+
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx)
+{
+       c4iw_id_free(&rdev->resource.srq_table, idx);
+       mutex_lock(&rdev->stats.lock);
+       rdev->stats.srqt.cur--;
+       mutex_unlock(&rdev->stats.lock);
+}
+
 /*
  * On-Chip QP Memory.
  */
index 29a4dd5053f29b531a9a524508c7250c70379a6e..11d55fc2ded7b18c95c5caa89f4e21987534881e 100644 (file)
@@ -491,7 +491,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
 static inline void t4_rq_consume(struct t4_wq *wq)
 {
        wq->rq.in_use--;
-       wq->rq.msn++;
        if (++wq->rq.cidx == wq->rq.size)
                wq->rq.cidx = 0;
 }
@@ -641,12 +640,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
 
 static inline int t4_wq_in_error(struct t4_wq *wq)
 {
-       return wq->rq.queue[wq->rq.size].status.qp_err;
+       return *wq->qp_errp;
 }
 
-static inline void t4_set_wq_in_error(struct t4_wq *wq)
+static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx)
 {
-       wq->rq.queue[wq->rq.size].status.qp_err = 1;
+       if (srqidx)
+               *wq->srqidxp = srqidx;
+       *wq->qp_errp = 1;
 }
 
 static inline void t4_disable_wq_db(struct t4_wq *wq)