net/mlx5e: Expand WQE stride when CQE compression is enabled
authorTariq Toukan <tariqt@mellanox.com>
Tue, 10 May 2016 21:29:15 +0000 (00:29 +0300)
committerDavid S. Miller <davem@davemloft.net>
Wed, 11 May 2016 23:42:39 +0000 (19:42 -0400)
Make the MPWQE/Striding RQ default configuration dynamic and not
statically set at compile time.  Now at driver load we set
stride size and num strides dynamically.

By default we use same values as before, but when CQE compression
is enabled, we set larger stride size to benefit from CQE
compression for larger packets.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 19f0d8db27ce60815e58e78b9a343ee6b09ad5f2..e05abad50c7b5288e7d7ea65aad8f57d845350d6 100644 (file)
 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x4
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW            0x6
 
-#define MLX5_MPWRQ_LOG_NUM_STRIDES             11 /* >= 9, HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE             6  /* >= 6, HW restriction */
-#define MLX5_MPWRQ_NUM_STRIDES                 BIT(MLX5_MPWRQ_LOG_NUM_STRIDES)
-#define MLX5_MPWRQ_STRIDE_SIZE                 BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE)
-#define MLX5_MPWRQ_LOG_WQE_SZ                  (MLX5_MPWRQ_LOG_NUM_STRIDES +\
-                                                MLX5_MPWRQ_LOG_STRIDE_SIZE)
+#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS        8  /* >= 6, HW restriction */
+#define MLX5_MPWRQ_LOG_WQE_SZ                  17
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
                                    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
 #define MLX5_MPWRQ_PAGES_PER_WQE               BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
@@ -154,6 +151,8 @@ struct mlx5e_umr_wqe {
 struct mlx5e_params {
        u8  log_sq_size;
        u8  rq_wq_type;
+       u8  mpwqe_log_stride_sz;
+       u8  mpwqe_log_num_strides;
        u8  log_rq_size;
        u16 num_channels;
        u8  num_tc;
@@ -249,6 +248,8 @@ struct mlx5e_rq {
        /* control */
        struct mlx5_wq_ctrl    wq_ctrl;
        u8                     wq_type;
+       u32                    mpwqe_stride_sz;
+       u32                    mpwqe_num_strides;
        u32                    rqn;
        struct mlx5e_channel  *channel;
        struct mlx5e_priv     *priv;
@@ -272,7 +273,7 @@ struct mlx5e_mpw_info {
        void (*dma_pre_sync)(struct device *pdev,
                             struct mlx5e_mpw_info *wi,
                             u32 wqe_offset, u32 len);
-       void (*add_skb_frag)(struct device *pdev,
+       void (*add_skb_frag)(struct mlx5e_rq *rq,
                             struct sk_buff *skb,
                             struct mlx5e_mpw_info *wi,
                             u32 page_idx, u32 frag_offset, u32 len);
index 0ea4c03a794674710e384edf89d6d68b1083c8a3..e40e59a0ff381e1d866e0d3d647d7ba2a9b66321 100644 (file)
@@ -307,7 +307,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
                rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
 
-               rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
+               rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
+               rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
+               rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
                byte_count = rq->wqe_sz;
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1130,9 +1132,9 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
        switch (priv->params.rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                MLX5_SET(wq, wq, log_wqe_num_of_strides,
-                        MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
+                        priv->params.mpwqe_log_num_strides - 9);
                MLX5_SET(wq, wq, log_wqe_stride_size,
-                        MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
+                        priv->params.mpwqe_log_stride_sz - 6);
                MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1199,7 +1201,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
        switch (priv->params.rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                log_cq_size = priv->params.log_rq_size +
-                       MLX5_MPWRQ_LOG_NUM_STRIDES;
+                       priv->params.mpwqe_log_num_strides;
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                log_cq_size = priv->params.log_rq_size;
@@ -2729,12 +2731,25 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
        switch (priv->params.rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+               priv->params.mpwqe_log_stride_sz =
+                       priv->params.rx_cqe_compress ?
+                       MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
+                       MLX5_MPWRQ_LOG_STRIDE_SIZE;
+               priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
+                       priv->params.mpwqe_log_stride_sz;
                priv->params.lro_en = true;
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
        }
 
+       mlx5_core_info(mdev,
+                      "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+                      priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+                      BIT(priv->params.log_rq_size),
+                      BIT(priv->params.mpwqe_log_stride_sz),
+                      priv->params.rx_cqe_compress_admin);
+
        priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
                                            BIT(priv->params.log_rq_size));
        priv->params.rx_cq_moderation_usec =
index c8b8d456268f7a796ace7846f0d85a2438e5a9ba..f3456798c5960fda619a2a57074aa9aaf902e1ef 100644 (file)
@@ -127,7 +127,7 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
 
        for (i = update_owner_only; i < cqe_count;
             i++, cq->mini_arr_idx++, cqcc++) {
-               if (unlikely(cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE))
+               if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
                        mlx5e_read_mini_arr_slot(cq, cqcc);
 
                mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
@@ -212,6 +212,11 @@ err_free_skb:
        return -ENOMEM;
 }
 
+static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
+{
+       return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
+}
+
 static inline void
 mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev,
                                struct mlx5e_mpw_info *wi,
@@ -230,13 +235,13 @@ mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev,
 }
 
 static inline void
-mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
+mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq,
                                struct sk_buff *skb,
                                struct mlx5e_mpw_info *wi,
                                u32 page_idx, u32 frag_offset,
                                u32 len)
 {
-       unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
+       unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz);
 
        wi->skbs_frags[page_idx]++;
        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
@@ -245,15 +250,15 @@ mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
 }
 
 static inline void
-mlx5e_add_skb_frag_fragmented_mpwqe(struct device *pdev,
+mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq,
                                    struct sk_buff *skb,
                                    struct mlx5e_mpw_info *wi,
                                    u32 page_idx, u32 frag_offset,
                                    u32 len)
 {
-       unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
+       unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz);
 
-       dma_sync_single_for_cpu(pdev,
+       dma_sync_single_for_cpu(rq->pdev,
                                wi->umr.dma_info[page_idx].addr + frag_offset,
                                len, DMA_FROM_DEVICE);
        wi->skbs_frags[page_idx]++;
@@ -293,7 +298,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
        skb_copy_to_linear_data_offset(skb, 0,
                                       page_address(dma_info->page) + offset,
                                       len);
-#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
        if (unlikely(offset + headlen > PAGE_SIZE)) {
                dma_info++;
                headlen_pg = len;
@@ -304,7 +308,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
                                               page_address(dma_info->page),
                                               len);
        }
-#endif
 }
 
 static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix)
@@ -430,7 +433,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
                if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i)))
                        goto err_unmap;
-               atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
+               atomic_add(mlx5e_mpwqe_strides_per_page(rq),
                           &wi->umr.dma_info[i].page->_count);
                wi->skbs_frags[i] = 0;
        }
@@ -449,7 +452,7 @@ err_unmap:
        while (--i >= 0) {
                dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
                               PCI_DMA_FROMDEVICE);
-               atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE,
+               atomic_sub(mlx5e_mpwqe_strides_per_page(rq),
                           &wi->umr.dma_info[i].page->_count);
                put_page(wi->umr.dma_info[i].page);
        }
@@ -474,7 +477,7 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
                dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
                               PCI_DMA_FROMDEVICE);
-               atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
+               atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
                           &wi->umr.dma_info[i].page->_count);
                put_page(wi->umr.dma_info[i].page);
        }
@@ -524,7 +527,7 @@ static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq,
         */
        split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER);
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-               atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
+               atomic_add(mlx5e_mpwqe_strides_per_page(rq),
                           &wi->dma_info.page[i]._count);
                wi->skbs_frags[i] = 0;
        }
@@ -548,7 +551,7 @@ void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
        dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz,
                       PCI_DMA_FROMDEVICE);
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-               atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
+               atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
                           &wi->dma_info.page[i]._count);
                put_page(&wi->dma_info.page[i]);
        }
@@ -793,9 +796,9 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
                                           u32 cqe_bcnt,
                                           struct sk_buff *skb)
 {
-       u32 consumed_bytes = ALIGN(cqe_bcnt, MLX5_MPWRQ_STRIDE_SIZE);
+       u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz);
        u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
-       u32 wqe_offset     = stride_ix * MLX5_MPWRQ_STRIDE_SIZE;
+       u32 wqe_offset     = stride_ix * rq->mpwqe_stride_sz;
        u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
        u32 page_idx       = wqe_offset >> PAGE_SHIFT;
        u32 head_page_idx  = page_idx;
@@ -803,19 +806,17 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
        u32 frag_offset    = head_offset + headlen;
        u16 byte_cnt       = cqe_bcnt - headlen;
 
-#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
        if (unlikely(frag_offset >= PAGE_SIZE)) {
                page_idx++;
                frag_offset -= PAGE_SIZE;
        }
-#endif
        wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes);
 
        while (byte_cnt) {
                u32 pg_consumed_bytes =
                        min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
 
-               wi->add_skb_frag(rq->pdev, skb, wi, page_idx, frag_offset,
+               wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset,
                                 pg_consumed_bytes);
                byte_cnt -= pg_consumed_bytes;
                frag_offset = 0;
@@ -865,7 +866,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
        mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 
 mpwrq_cqe_out:
-       if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES))
+       if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
                return;
 
        wi->free_wqe(rq, wi);