IB/{hfi1, qib, rdmavt}: Schedule multi RC/UC packets instead of posting
authorMichael J. Ruhl <michael.j.ruhl@intel.com>
Mon, 10 Sep 2018 16:49:27 +0000 (09:49 -0700)
committerJason Gunthorpe <jgg@mellanox.com>
Tue, 11 Sep 2018 15:55:02 +0000 (09:55 -0600)
The post_send() path determines if it should post directly or, schedule
the post for later.  The current logic is:

  if the swqe ring is empty or (for hfi1) wqe->length <= piothreshold
    post the send
  else
    schedule

This can allow large requests to call the send engine directly.  Large
requests can potentially produce a large number of packets prior to
returning to the caller, blocking the caller from posting more requests,
and allowing better parallel processing.

Allow the driver(s) more say in this logic (pass call_send to the driver,
rather than examining a return value).

Update hfi1/qib logic to schedule the send engine if an RC or UC message
is larger than the QP MTU size.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/verbs.h
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/sw/rdmavt/qp.c
include/rdma/rdma_vt.h

index 9b1e84a6b1ccb67e3ffae2eaf7603ae73d7ac03a..54d9ff1710597eaf405c428197d09f4205eb3b7b 100644 (file)
@@ -285,17 +285,13 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
  * hfi1_check_send_wqe - validate wqe
  * @qp - The qp
  * @wqe - The built wqe
- *
- * validate wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * @call_send - Determine if the send should be posted or scheduled.
  *
  * Returns 0 on success, -EINVAL on failure
  *
  */
 int hfi1_check_send_wqe(struct rvt_qp *qp,
-                       struct rvt_swqe *wqe)
+                       struct rvt_swqe *wqe, bool *call_send)
 {
        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
        struct rvt_ah *ah;
@@ -305,6 +301,8 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
        case IB_QPT_UC:
                if (wqe->length > 0x80000000U)
                        return -EINVAL;
+               if (wqe->length > qp->pmtu)
+                       *call_send = false;
                break;
        case IB_QPT_SMI:
                ah = ibah_to_rvtah(wqe->ud_wr.ah);
@@ -321,7 +319,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
        default:
                break;
        }
-       return wqe->length <= piothreshold;
+       return 0;
 }
 
 /**
index a4d06502f06df61d34611ee09bc888af80fcc04c..269ec338581b90794af2f10b5f4b45b02e8928d4 100644 (file)
@@ -343,7 +343,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
                    int attr_mask, struct ib_udata *udata);
 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                       bool *call_send);
 
 extern const u32 rc_only_opcode;
 extern const u32 uc_only_opcode;
index 344e401915f7c92d69e9b06da70789fedcf00740..a81905df2d0f3d0815c1c53761ee8b7d75b4e62e 100644 (file)
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
  * qib_check_send_wqe - validate wr/wqe
  * @qp - The qp
  * @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled
  *
- * validate wr/wqe.  This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
- *
- * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ * Returns 0 on success, -EINVAL on failure
  */
 int qib_check_send_wqe(struct rvt_qp *qp,
-                      struct rvt_swqe *wqe)
+                      struct rvt_swqe *wqe, bool *call_send)
 {
        struct rvt_ah *ah;
-       int ret = 0;
 
        switch (qp->ibqp.qp_type) {
        case IB_QPT_RC:
        case IB_QPT_UC:
                if (wqe->length > 0x80000000U)
                        return -EINVAL;
+               if (wqe->length > qp->pmtu)
+                       *call_send = false;
                break;
        case IB_QPT_SMI:
        case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp,
                if (wqe->length > (1 << ah->log_pmtu))
                        return -EINVAL;
                /* progress hint */
-               ret = 1;
+               *call_send = true;
                break;
        default:
                break;
        }
-       return ret;
+       return 0;
 }
 
 #ifdef CONFIG_DEBUG_FS
index 666613eef88fb1040a7482e93630807195b47d6f..3d7b744ae8fb3e30c4c814e0bf48ca0bd49c7911 100644 (file)
@@ -303,7 +303,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 
 int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
 
-int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                      bool *call_send);
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 
index 5ce403c6cddba43e00aa123575abf38d79a51a59..a9b7d7ff32ee8f49854d73495e3d8560a21ff5c7 100644 (file)
@@ -1718,7 +1718,7 @@ static inline int rvt_qp_is_avail(
  */
 static int rvt_post_one_wr(struct rvt_qp *qp,
                           const struct ib_send_wr *wr,
-                          int *call_send)
+                          bool *call_send)
 {
        struct rvt_swqe *wqe;
        u32 next;
@@ -1825,11 +1825,9 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
 
        /* general part of wqe valid - allow for driver checks */
        if (rdi->driver_f.check_send_wqe) {
-               ret = rdi->driver_f.check_send_wqe(qp, wqe);
+               ret = rdi->driver_f.check_send_wqe(qp, wqe, call_send);
                if (ret < 0)
                        goto bail_inval_free;
-               if (ret)
-                       *call_send = ret;
        }
 
        log_pmtu = qp->log_pmtu;
@@ -1897,7 +1895,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
        unsigned long flags = 0;
-       int call_send;
+       bool call_send;
        unsigned nreq = 0;
        int err = 0;
 
@@ -1930,7 +1928,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 bail:
        spin_unlock_irqrestore(&qp->s_hlock, flags);
        if (nreq) {
-               if (call_send)
+               /*
+                * Only call do_send if there is exactly one packet, and the
+                * driver said it was ok.
+                */
+               if (nreq == 1 && call_send)
                        rdi->driver_f.do_send(qp);
                else
                        rdi->driver_f.schedule_send_no_lock(qp);
index e79229a0cf0147a8d86dfa9f48d02f972c454187..e32facdd9fd3d0357335e6fb86b9d012c8615117 100644 (file)
@@ -214,8 +214,14 @@ struct rvt_driver_provided {
        void (*schedule_send)(struct rvt_qp *qp);
        void (*schedule_send_no_lock)(struct rvt_qp *qp);
 
-       /* Driver specific work request checking */
-       int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+       /*
+        * Validate the wqe.  This needs to be done prior to inserting the
+        * wqe into the ring, but after the wqe has been set up.  Allow for
+        * driver specific work request checking by providing a callback.
+        * call_send indicates if the wqe should be posted or scheduled.
+        */
+       int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+                             bool *call_send);
 
        /*
         * Sometimes rdmavt needs to kick the driver's send progress. That is