net/smc: preallocated memory for rdma work requests
authorUrsula Braun <ubraun@linux.ibm.com>
Mon, 4 Feb 2019 12:44:44 +0000 (13:44 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 4 Feb 2019 17:11:19 +0000 (09:11 -0800)
The work requests for rdma writes are built in local variables within
function smc_tx_rdma_write(). This violates the rule that the work
request storage has to stay till the work request is confirmed by
a completion queue response.
This patch introduces preallocated memory for these work requests.
The storage is allocated, once a link (and thus a queue pair) is
established.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/smc_cdc.c
net/smc/smc_cdc.h
net/smc/smc_core.h
net/smc/smc_llc.c
net/smc/smc_tx.c
net/smc/smc_wr.c
net/smc/smc_wr.h

index 1c5333d494e9a352c582d0793c282f26809a21a3..b80ef104ab4e525a9cb1a0a5bf37e9ae0d53439f 100644 (file)
 
 /********************************** send *************************************/
 
-struct smc_cdc_tx_pend {
-       struct smc_connection   *conn;          /* socket connection */
-       union smc_host_cursor   cursor; /* tx sndbuf cursor sent */
-       union smc_host_cursor   p_cursor;       /* rx RMBE cursor produced */
-       u16                     ctrl_seq;       /* conn. tx sequence # */
-};
-
 /* handler for send/transmission completion of a CDC msg */
 static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
                               struct smc_link *link,
@@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
                          struct smc_wr_buf **wr_buf,
+                         struct smc_rdma_wr **wr_rdma_buf,
                          struct smc_cdc_tx_pend **pend)
 {
        struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
        int rc;
 
        rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+                                    wr_rdma_buf,
                                     (struct smc_wr_tx_pend_priv **)pend);
        if (!conn->alert_token_local)
                /* abnormal termination */
@@ -121,7 +116,7 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
        struct smc_wr_buf *wr_buf;
        int rc;
 
-       rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+       rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
        if (rc)
                return rc;
 
index b5bfe38c7f9b6a87258adc0aecce58c31c2a164a..2148da7a26b17a34ce401994945458bc1222fd35 100644 (file)
@@ -270,10 +270,16 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
                smcr_cdc_msg_to_host(local, peer, conn);
 }
 
-struct smc_cdc_tx_pend;
+struct smc_cdc_tx_pend {
+       struct smc_connection   *conn;          /* socket connection */
+       union smc_host_cursor   cursor;         /* tx sndbuf cursor sent */
+       union smc_host_cursor   p_cursor;       /* rx RMBE cursor produced */
+       u16                     ctrl_seq;       /* conn. tx sequence # */
+};
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
                          struct smc_wr_buf **wr_buf,
+                         struct smc_rdma_wr **wr_rdma_buf,
                          struct smc_cdc_tx_pend **pend);
 void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
 int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
index b00287989a3dea43975ce728df8a9e2885656b71..8806d2afa6edb1178f4ad2904aa1346ee42bcae6 100644 (file)
@@ -52,6 +52,24 @@ enum smc_wr_reg_state {
        FAILED          /* ib_wr_reg_mr response: failure */
 };
 
+struct smc_rdma_sge {                          /* sges for RDMA writes */
+       struct ib_sge           wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE];
+};
+
+#define SMC_MAX_RDMA_WRITES    2               /* max. # of RDMA writes per
+                                                * message send
+                                                */
+
+struct smc_rdma_sges {                         /* sges per message send */
+       struct smc_rdma_sge     tx_rdma_sge[SMC_MAX_RDMA_WRITES];
+};
+
+struct smc_rdma_wr {                           /* work requests per message
+                                                * send
+                                                */
+       struct ib_rdma_wr       wr_tx_rdma[SMC_MAX_RDMA_WRITES];
+};
+
 struct smc_link {
        struct smc_ib_device    *smcibdev;      /* ib-device */
        u8                      ibport;         /* port - values 1 | 2 */
@@ -64,6 +82,8 @@ struct smc_link {
        struct smc_wr_buf       *wr_tx_bufs;    /* WR send payload buffers */
        struct ib_send_wr       *wr_tx_ibs;     /* WR send meta data */
        struct ib_sge           *wr_tx_sges;    /* WR send gather meta data */
+       struct smc_rdma_sges    *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
+       struct smc_rdma_wr      *wr_tx_rdmas;   /* WR RDMA WRITE */
        struct smc_wr_tx_pend   *wr_tx_pends;   /* WR send waiting for CQE */
        /* above four vectors have wr_tx_cnt elements and use the same index */
        dma_addr_t              wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
index a6d3623d06f422073995b5c4e42d31b46088b466..4fd60c522802949e8b0f6931f911fc445765873b 100644 (file)
@@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link,
 {
        int rc;
 
-       rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
+       rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
+                                    pend);
        if (rc < 0)
                return rc;
        BUILD_BUG_ON_MSG(
index 36af3de731b9d0c8099b4250360b6c66160eb935..2fdfaff60cf9c9d7d589f58429083190a083e650 100644 (file)
@@ -266,27 +266,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
 
 /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
 static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
-                            int num_sges, struct ib_sge sges[])
+                            int num_sges, struct ib_rdma_wr *rdma_wr)
 {
        struct smc_link_group *lgr = conn->lgr;
-       struct ib_rdma_wr rdma_wr;
        struct smc_link *link;
        int rc;
 
-       memset(&rdma_wr, 0, sizeof(rdma_wr));
        link = &lgr->lnk[SMC_SINGLE_LINK];
-       rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link);
-       rdma_wr.wr.sg_list = sges;
-       rdma_wr.wr.num_sge = num_sges;
-       rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
-       rdma_wr.remote_addr =
+       rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
+       rdma_wr->wr.num_sge = num_sges;
+       rdma_wr->remote_addr =
                lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
                /* RMBE within RMB */
                conn->tx_off +
                /* offset within RMBE */
                peer_rmbe_offset;
-       rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
-       rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
+       rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+       rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
        if (rc) {
                conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
                smc_lgr_terminate(lgr);
@@ -313,24 +309,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
 /* SMC-R helper for smc_tx_rdma_writes() */
 static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
                               size_t src_off, size_t src_len,
-                              size_t dst_off, size_t dst_len)
+                              size_t dst_off, size_t dst_len,
+                              struct smc_rdma_wr *wr_rdma_buf)
 {
        dma_addr_t dma_addr =
                sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
-       struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
        int src_len_sum = src_len, dst_len_sum = dst_len;
-       struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
        int sent_count = src_off;
        int srcchunk, dstchunk;
        int num_sges;
        int rc;
 
        for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+               struct ib_sge *sge =
+                       wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
+
                num_sges = 0;
                for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-                       sges[srcchunk].addr = dma_addr + src_off;
-                       sges[srcchunk].length = src_len;
-                       sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
+                       sge[srcchunk].addr = dma_addr + src_off;
+                       sge[srcchunk].length = src_len;
                        num_sges++;
 
                        src_off += src_len;
@@ -343,7 +340,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
                        src_len = dst_len - src_len; /* remainder */
                        src_len_sum += src_len;
                }
-               rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
+               rc = smc_tx_rdma_write(conn, dst_off, num_sges,
+                                      &wr_rdma_buf->wr_tx_rdma[dstchunk]);
                if (rc)
                        return rc;
                if (dst_len_sum == len)
@@ -402,7 +400,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
  * usable snd_wnd as max transmit
  */
-static int smc_tx_rdma_writes(struct smc_connection *conn)
+static int smc_tx_rdma_writes(struct smc_connection *conn,
+                             struct smc_rdma_wr *wr_rdma_buf)
 {
        size_t len, src_len, dst_off, dst_len; /* current chunk values */
        union smc_host_cursor sent, prep, prod, cons;
@@ -463,7 +462,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
                                         dst_off, dst_len);
        else
                rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
-                                        dst_off, dst_len);
+                                        dst_off, dst_len, wr_rdma_buf);
        if (rc)
                return rc;
 
@@ -484,11 +483,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 {
        struct smc_cdc_producer_flags *pflags;
+       struct smc_rdma_wr *wr_rdma_buf;
        struct smc_cdc_tx_pend *pend;
        struct smc_wr_buf *wr_buf;
        int rc;
 
-       rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
+       rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
        if (rc < 0) {
                if (rc == -EBUSY) {
                        struct smc_sock *smc =
@@ -506,7 +506,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 
        spin_lock_bh(&conn->send_lock);
        if (!conn->local_tx_ctrl.prod_flags.urg_data_present) {
-               rc = smc_tx_rdma_writes(conn);
+               rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
                if (rc) {
                        smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
                                           (struct smc_wr_tx_pend_priv *)pend);
@@ -533,7 +533,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
 
        spin_lock_bh(&conn->send_lock);
        if (!pflags->urg_data_present)
-               rc = smc_tx_rdma_writes(conn);
+               rc = smc_tx_rdma_writes(conn, NULL);
        if (!rc)
                rc = smcd_cdc_msg_send(conn);
 
index 1dc88c32d6bb7070772770e196aa27b7077e33e6..253aa75dc2b6818c1a9d4646535c51e1e59bc3f3 100644 (file)
@@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
  * @link:              Pointer to smc_link used to later send the message.
  * @handler:           Send completion handler function pointer.
  * @wr_buf:            Out value returns pointer to message buffer.
+ * @wr_rdma_buf:       Out value returns pointer to rdma work request.
  * @wr_pend_priv:      Out value returns pointer serving as handler context.
  *
  * Return: 0 on success, or -errno on error.
@@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
 int smc_wr_tx_get_free_slot(struct smc_link *link,
                            smc_wr_tx_handler handler,
                            struct smc_wr_buf **wr_buf,
+                           struct smc_rdma_wr **wr_rdma_buf,
                            struct smc_wr_tx_pend_priv **wr_pend_priv)
 {
        struct smc_wr_tx_pend *wr_pend;
@@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
        wr_ib = &link->wr_tx_ibs[idx];
        wr_ib->wr_id = wr_id;
        *wr_buf = &link->wr_tx_bufs[idx];
+       if (wr_rdma_buf)
+               *wr_rdma_buf = &link->wr_tx_rdmas[idx];
        *wr_pend_priv = &wr_pend->priv;
        return 0;
 }
@@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk)
                        lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
                lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
                lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
+                       lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
+                       lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
+                       lnk->roce_pd->local_dma_lkey;
+               lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
+                       lnk->roce_pd->local_dma_lkey;
                lnk->wr_tx_ibs[i].next = NULL;
                lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
                lnk->wr_tx_ibs[i].num_sge = 1;
                lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
                lnk->wr_tx_ibs[i].send_flags =
                        IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
+                       lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
+               lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
+                       lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
        }
        for (i = 0; i < lnk->wr_rx_cnt; i++) {
                lnk->wr_rx_sges[i].addr =
@@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk)
        lnk->wr_tx_mask = NULL;
        kfree(lnk->wr_tx_sges);
        lnk->wr_tx_sges = NULL;
+       kfree(lnk->wr_tx_rdma_sges);
+       lnk->wr_tx_rdma_sges = NULL;
        kfree(lnk->wr_rx_sges);
        lnk->wr_rx_sges = NULL;
+       kfree(lnk->wr_tx_rdmas);
+       lnk->wr_tx_rdmas = NULL;
        kfree(lnk->wr_rx_ibs);
        lnk->wr_rx_ibs = NULL;
        kfree(lnk->wr_tx_ibs);
@@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
                                  GFP_KERNEL);
        if (!link->wr_rx_ibs)
                goto no_mem_wr_tx_ibs;
+       link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
+                                   sizeof(link->wr_tx_rdmas[0]),
+                                   GFP_KERNEL);
+       if (!link->wr_tx_rdmas)
+               goto no_mem_wr_rx_ibs;
+       link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
+                                       sizeof(link->wr_tx_rdma_sges[0]),
+                                       GFP_KERNEL);
+       if (!link->wr_tx_rdma_sges)
+               goto no_mem_wr_tx_rdmas;
        link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
                                   GFP_KERNEL);
        if (!link->wr_tx_sges)
-               goto no_mem_wr_rx_ibs;
+               goto no_mem_wr_tx_rdma_sges;
        link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
                                   sizeof(link->wr_rx_sges[0]),
                                   GFP_KERNEL);
@@ -579,6 +611,10 @@ no_mem_wr_rx_sges:
        kfree(link->wr_rx_sges);
 no_mem_wr_tx_sges:
        kfree(link->wr_tx_sges);
+no_mem_wr_tx_rdma_sges:
+       kfree(link->wr_tx_rdma_sges);
+no_mem_wr_tx_rdmas:
+       kfree(link->wr_tx_rdmas);
 no_mem_wr_rx_ibs:
        kfree(link->wr_rx_ibs);
 no_mem_wr_tx_ibs:
index 1d85bb14fd6f88c455274ef13c5003fb4eb46e28..09bf32fd39596ea241c97f3cd2dcbb8f146d9604 100644 (file)
@@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev);
 
 int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler,
                            struct smc_wr_buf **wr_buf,
+                           struct smc_rdma_wr **wrs,
                            struct smc_wr_tx_pend_priv **wr_pend_priv);
 int smc_wr_tx_put_slot(struct smc_link *link,
                       struct smc_wr_tx_pend_priv *wr_pend_priv);