From: Yevgeny Petrilin Date: Tue, 6 Mar 2012 04:03:34 +0000 (+0000) Subject: net/mlx4_en: Saving mem access on data path X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=ebf8c9aa032f03343b91c91951b0705021b02eb0;p=openwrt%2Fstaging%2Fblogic.git net/mlx4_en: Saving mem access on data path Localized the pdev->dev, and using dma_map instead of pci_map There are multiple map/unmap operations on data path, optimizing those by saving redundant pointer access. Those places were identified as hot-spots when running kernel profiling during some benchmarks. The fixes had most impact when testing packet rate with small packets, reducing several % from CPU load, and in some case being the difference between reaching wire speed or being CPU bound. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 9fe4f94c6da7..31b455a49273 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, memset(priv, 0, sizeof(struct mlx4_en_priv)); priv->dev = dev; priv->mdev = mdev; + priv->ddev = &mdev->pdev->dev; priv->prof = prof; priv->port = port; priv->port_up = false; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d703ef2c9c91..c881712b7492 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *ring_alloc, int i) { - struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; struct page *page; @@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, skb_frags[i].offset = page_alloc->offset; page_alloc->offset += frag_info->frag_stride; } - dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) + + dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) + skb_frags[i].offset, frag_info->frag_size, PCI_DMA_FROMDEVICE); rx_desc->data[i].addr = cpu_to_be64(dma); @@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { - struct mlx4_en_dev *mdev = priv->mdev; struct page_frag *skb_frags; struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); dma_addr_t dma; @@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, dma = be64_to_cpu(rx_desc->data[nr].addr); en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); - pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, + dma_unmap_single(priv->ddev, dma, skb_frags[nr].size, PCI_DMA_FROMDEVICE); put_page(skb_frags[nr].page); } @@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, int length) { struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; - struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_frag_info *frag_info; int nr; dma_addr_t dma; @@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, goto fail; /* Unmap buffer */ - pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]), + dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]), PCI_DMA_FROMDEVICE); } /* Adjust size of last fragment to match actual length */ @@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *page_alloc, unsigned int length) { - struct mlx4_en_dev *mdev = priv->mdev; struct sk_buff *skb; void *va; int used_frags; @@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, /* We are copying all relevant data to the skb - temporarily * synch buffers for the copy */ dma = be64_to_cpu(rx_desc->data[0].addr); - dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length, + dma_sync_single_for_cpu(priv->ddev, dma, length, DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, va, length); - dma_sync_single_for_device(&mdev->pdev->dev, dma, length, + dma_sync_single_for_device(priv->ddev, dma, length, DMA_FROM_DEVICE); skb->tail += length; } else { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 50b3fa5212ed..008f0af5cc8b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner) { - struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; @@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { if (!tx_info->inl) { if (tx_info->linear) { - pci_unmap_single(mdev->pdev, + dma_unmap_single(priv->ddev, (dma_addr_t) be64_to_cpu(data->addr), be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); @@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, for (i = 0; i < frags; i++) { frag = &skb_shinfo(skb)->frags[i]; - pci_unmap_page(mdev->pdev, + dma_unmap_page(priv->ddev, (dma_addr_t) be64_to_cpu(data[i].addr), skb_frag_size(frag), PCI_DMA_TODEVICE); } @@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, } if (tx_info->linear) { - pci_unmap_single(mdev->pdev, + dma_unmap_single(priv->ddev, (dma_addr_t) be64_to_cpu(data->addr), be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); @@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, if ((void *) data >= end) data = ring->buf; frag = &skb_shinfo(skb)->frags[i]; - pci_unmap_page(mdev->pdev, + dma_unmap_page(priv->ddev, (dma_addr_t) be64_to_cpu(data->addr), skb_frag_size(frag), PCI_DMA_TODEVICE); ++data; @@ -733,7 +732,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Map fragments */ for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { frag = &skb_shinfo(skb)->frags[i]; - dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag, + dma = skb_frag_dma_map(priv->ddev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); data->addr = cpu_to_be64(dma); @@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Map linear part */ if (tx_info->linear) { - dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size, + dma = dma_map_single(priv->ddev, skb->data + lso_header_size, skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE); data->addr = cpu_to_be64(dma); data->lkey = cpu_to_be32(mdev->mr.key); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d60335f3c473..a5bb0b7d339a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -482,6 +482,7 @@ struct mlx4_en_priv { struct mlx4_en_stat_out_mbox hw_stats; int vids[128]; bool wol; + struct device *ddev; }; enum mlx4_en_wol {