net/mlx4_en: Align tx path structures to cache lines
authorEric Dumazet <edumazet@google.com>
Sun, 5 Oct 2014 09:35:10 +0000 (12:35 +0300)
committerDavid S. Miller <davem@davemloft.net>
Mon, 6 Oct 2014 05:04:15 +0000 (01:04 -0400)
Reorganize struct mlx4_en_tx_ring to have:
- One cache line containing last_nr_txbb & cons & wake_queue, used by tx
  completion.
- One cache line containing fields dirtied by mlx4_en_xmit()
- Following part is read mostly and shared by cpus.

Align struct mlx4_en_tx_info to a cache line

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

index e54b653de3d41b23c916f7d9da956d227176d90a..b7bde95e05fe1ed16fcb0f1d6616669d9b435703 100644 (file)
@@ -216,13 +216,13 @@ enum cq_type {
 
 struct mlx4_en_tx_info {
        struct sk_buff *skb;
-       u32 nr_txbb;
-       u32 nr_bytes;
-       u8 linear;
-       u8 data_offset;
-       u8 inl;
-       u8 ts_requested;
-};
+       u32             nr_txbb;
+       u32             nr_bytes;
+       u8              linear;
+       u8              data_offset;
+       u8              inl;
+       u8              ts_requested;
+} ____cacheline_aligned_in_smp;
 
 
 #define MLX4_EN_BIT_DESC_OWN   0x80000000
@@ -253,40 +253,46 @@ struct mlx4_en_rx_alloc {
 };
 
 struct mlx4_en_tx_ring {
+       /* cache line used and dirtied in tx completion
+        * (mlx4_en_free_tx_buf())
+        */
+       u32                     last_nr_txbb;
+       u32                     cons;
+       unsigned long           wake_queue;
+
+       /* cache line used and dirtied in mlx4_en_xmit() */
+       u32                     prod ____cacheline_aligned_in_smp;
+       unsigned long           bytes;
+       unsigned long           packets;
+       unsigned long           tx_csum;
+       unsigned long           tso_packets;
+       unsigned long           xmit_more;
+       struct mlx4_bf          bf;
+       unsigned long           queue_stopped;
+
+       /* Following part should be mostly read */
+       cpumask_t               affinity_mask;
+       struct mlx4_qp          qp;
        struct mlx4_hwq_resources wqres;
-       u32 size ; /* number of TXBBs */
-       u32 size_mask;
-       u16 stride;
-       u16 cqn;        /* index of port CQ associated with this ring */
-       u32 prod;
-       u32 cons;
-       u32 buf_size;
-       u32 doorbell_qpn;
-       void *buf;
-       struct mlx4_en_tx_info *tx_info;
-       u8 *bounce_buf;
-       u8 queue_index;
-       cpumask_t affinity_mask;
-       u32 last_nr_txbb;
-       struct mlx4_qp qp;
-       struct mlx4_qp_context context;
-       int qpn;
-       enum mlx4_qp_state qp_state;
-       struct mlx4_srq dummy;
-       unsigned long bytes;
-       unsigned long packets;
-       unsigned long tx_csum;
-       unsigned long queue_stopped;
-       unsigned long wake_queue;
-       unsigned long tso_packets;
-       unsigned long xmit_more;
-       struct mlx4_bf bf;
-       bool bf_enabled;
-       bool bf_alloced;
-       struct netdev_queue *tx_queue;
-       int hwtstamp_tx_type;
-       int inline_thold;
-};
+       u32                     size; /* number of TXBBs */
+       u32                     size_mask;
+       u16                     stride;
+       u16                     cqn;    /* index of port CQ associated with this ring */
+       u32                     buf_size;
+       u32                     doorbell_qpn;
+       void                    *buf;
+       struct mlx4_en_tx_info  *tx_info;
+       u8                      *bounce_buf;
+       struct mlx4_qp_context  context;
+       int                     qpn;
+       enum mlx4_qp_state      qp_state;
+       u8                      queue_index;
+       bool                    bf_enabled;
+       bool                    bf_alloced;
+       struct netdev_queue     *tx_queue;
+       int                     hwtstamp_tx_type;
+       int                     inline_thold;
+} ____cacheline_aligned_in_smp;
 
 struct mlx4_en_rx_desc {
        /* actual number of entries depends on rx ring stride */