net/mlx5e: Add RSS support for hairpin
authorOr Gerlitz <ogerlitz@mellanox.com>
Sun, 26 Nov 2017 18:39:12 +0000 (20:39 +0200)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 19 Jan 2018 20:41:32 +0000 (22:41 +0200)
Support RSS for hairpin traffic. We create multiple hairpin RQ/SQ pairs
and RSS TTC table per hairpin instance and steer the related flows
through that table so they are spread between the pairs.

We open one pair per 50Gbs link speed, for all speeds <= 50Gbs, there
is one pair and no RSS while for 100Gbs ports two RSSed pairs.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index 10620c3e0c6175733dc905994f2a9c1ba777f1fe..a3536e051052b46a9a3b6d2035e56c48a766c58c 100644 (file)
@@ -697,6 +697,11 @@ enum {
        MLX5E_ARFS_FT_LEVEL
 };
 
+enum {
+       MLX5E_TC_FT_LEVEL = 0,
+       MLX5E_TC_TTC_FT_LEVEL,
+};
+
 struct mlx5e_ethtool_table {
        struct mlx5_flow_table *ft;
        int                    num_rules;
@@ -1057,6 +1062,8 @@ int mlx5e_open(struct net_device *netdev);
 void mlx5e_update_stats_work(struct work_struct *work);
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
 
+int mlx5e_bits_invert(unsigned long a, int size);
+
 /* ethtool helpers */
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
                               struct ethtool_drvinfo *drvinfo);
index 466a4e1244d796274095fafa335777ee14d22563..8e380a89ec63abf28998e8d7868f85c8ed52e137 100644 (file)
@@ -2219,7 +2219,7 @@ static int mlx5e_rx_hash_fn(int hfunc)
               MLX5_RX_HASH_FN_INVERTED_XOR8;
 }
 
-static int mlx5e_bits_invert(unsigned long a, int size)
+int mlx5e_bits_invert(unsigned long a, int size)
 {
        int inv = 0;
        int i;
index 46752a6dff9245f23bb8d082b22b2142590d96e6..7a4f577d1c5b272f2313274903336b9104c1f1ee 100644 (file)
 #include "en_tc.h"
 #include "eswitch.h"
 #include "vxlan.h"
+#include "fs_core.h"
 
 struct mlx5_nic_flow_attr {
        u32 action;
        u32 flow_tag;
        u32 mod_hdr_id;
        u32 hairpin_tirn;
+       struct mlx5_flow_table  *hairpin_ft;
 };
 
 enum {
@@ -64,6 +66,7 @@ enum {
        MLX5E_TC_FLOW_NIC       = BIT(1),
        MLX5E_TC_FLOW_OFFLOADED = BIT(2),
        MLX5E_TC_FLOW_HAIRPIN   = BIT(3),
+       MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(4),
 };
 
 struct mlx5e_tc_flow {
@@ -100,8 +103,14 @@ struct mlx5e_hairpin {
        struct mlx5_hairpin *pair;
 
        struct mlx5_core_dev *func_mdev;
+       struct mlx5e_priv *func_priv;
        u32 tdn;
        u32 tirn;
+
+       int num_channels;
+       struct mlx5e_rqt indir_rqt;
+       u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
+       struct mlx5e_ttc_table ttc;
 };
 
 struct mlx5e_hairpin_entry {
@@ -290,6 +299,151 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 }
 
+static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
+{
+       u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
+       struct mlx5e_priv *priv = hp->func_priv;
+       int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
+
+       mlx5e_build_default_indir_rqt(indirection_rqt, sz,
+                                     hp->num_channels);
+
+       for (i = 0; i < sz; i++) {
+               ix = i;
+               if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
+                       ix = mlx5e_bits_invert(i, ilog2(sz));
+               ix = indirection_rqt[ix];
+               rqn = hp->pair->rqn[ix];
+               MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
+       }
+}
+
+static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
+{
+       int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
+       struct mlx5e_priv *priv = hp->func_priv;
+       struct mlx5_core_dev *mdev = priv->mdev;
+       void *rqtc;
+       u32 *in;
+
+       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+       MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+       MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+       mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+
+       err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
+       if (!err)
+               hp->indir_rqt.enabled = true;
+
+       kvfree(in);
+       return err;
+}
+
+static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+       struct mlx5e_priv *priv = hp->func_priv;
+       u32 in[MLX5_ST_SZ_DW(create_tir_in)];
+       int tt, i, err;
+       void *tirc;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
+               tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+
+               MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+               MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+               MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
+               mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+
+               err = mlx5_core_create_tir(hp->func_mdev, in,
+                                          MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
+               if (err) {
+                       mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
+                       goto err_destroy_tirs;
+               }
+       }
+       return 0;
+
+err_destroy_tirs:
+       for (i = 0; i < tt; i++)
+               mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
+       return err;
+}
+
+static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+       int tt;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
+}
+
+static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
+                                        struct ttc_params *ttc_params)
+{
+       struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+       int tt;
+
+       memset(ttc_params, 0, sizeof(*ttc_params));
+
+       ttc_params->any_tt_tirn = hp->tirn;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
+
+       ft_attr->max_fte = MLX5E_NUM_TT;
+       ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
+       ft_attr->prio = MLX5E_TC_PRIO;
+}
+
+static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
+{
+       struct mlx5e_priv *priv = hp->func_priv;
+       struct ttc_params ttc_params;
+       int err;
+
+       err = mlx5e_hairpin_create_indirect_rqt(hp);
+       if (err)
+               return err;
+
+       err = mlx5e_hairpin_create_indirect_tirs(hp);
+       if (err)
+               goto err_create_indirect_tirs;
+
+       mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
+       err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
+       if (err)
+               goto err_create_ttc_table;
+
+       netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
+                  hp->num_channels, hp->ttc.ft.t->id);
+
+       return 0;
+
+err_create_ttc_table:
+       mlx5e_hairpin_destroy_indirect_tirs(hp);
+err_create_indirect_tirs:
+       mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+
+       return err;
+}
+
+static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
+{
+       struct mlx5e_priv *priv = hp->func_priv;
+
+       mlx5e_destroy_ttc_table(priv, &hp->ttc);
+       mlx5e_hairpin_destroy_indirect_tirs(hp);
+       mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+}
+
 static struct mlx5e_hairpin *
 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
                     int peer_ifindex)
@@ -313,13 +467,23 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params
        }
        hp->pair = pair;
        hp->func_mdev = func_mdev;
+       hp->func_priv = priv;
+       hp->num_channels = params->num_channels;
 
        err = mlx5e_hairpin_create_transport(hp);
        if (err)
                goto create_transport_err;
 
+       if (hp->num_channels > 1) {
+               err = mlx5e_hairpin_rss_init(hp);
+               if (err)
+                       goto rss_init_err;
+       }
+
        return hp;
 
+rss_init_err:
+       mlx5e_hairpin_destroy_transport(hp);
 create_transport_err:
        mlx5_core_hairpin_destroy(hp->pair);
 create_pair_err:
@@ -329,6 +493,8 @@ create_pair_err:
 
 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
 {
+       if (hp->num_channels > 1)
+               mlx5e_hairpin_rss_cleanup(hp);
        mlx5e_hairpin_destroy_transport(hp);
        mlx5_core_hairpin_destroy(hp->pair);
        kvfree(hp);
@@ -400,6 +566,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
        struct mlx5_core_dev *peer_mdev;
        struct mlx5e_hairpin_entry *hpe;
        struct mlx5e_hairpin *hp;
+       u64 link_speed64;
+       u32 link_speed;
        u8 match_prio;
        u16 peer_id;
        int err;
@@ -433,6 +601,13 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
                                     MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
        params.q_counter = priv->q_counter;
 
+       /* set hairpin pair per each 50Gbs share of the link */
+       mlx5e_get_max_linkspeed(priv->mdev, &link_speed);
+       link_speed = max_t(u32, link_speed, 50000);
+       link_speed64 = link_speed;
+       do_div(link_speed64, 50000);
+       params.num_channels = link_speed64;
+
        hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
        if (IS_ERR(hp)) {
                err = PTR_ERR(hp);
@@ -448,8 +623,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
                 hash_hairpin_info(peer_id, match_prio));
 
 attach_flow:
-       flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+       if (hpe->hp->num_channels > 1) {
+               flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
+               flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+       } else {
+               flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+       }
        list_add(&flow->hairpin, &hpe->flows);
+
        return 0;
 
 create_hairpin_err:
@@ -497,20 +678,24 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
        bool table_created = false;
        int err, dest_ix = 0;
 
-       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-               if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
-                       err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
-                       if (err) {
-                               rule = ERR_PTR(err);
-                               goto err_add_hairpin_flow;
-                       }
+       if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
+               err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
+               if (err) {
+                       rule = ERR_PTR(err);
+                       goto err_add_hairpin_flow;
+               }
+               if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
+                       dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+                       dest[dest_ix].ft = attr->hairpin_ft;
+               } else {
                        dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
                        dest[dest_ix].tir_num = attr->hairpin_tirn;
-               } else {
-                       dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-                       dest[dest_ix].ft = priv->fs.vlan.ft.t;
                }
                dest_ix++;
+       } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+               dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+               dest[dest_ix].ft = priv->fs.vlan.ft.t;
+               dest_ix++;
        }
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
@@ -551,7 +736,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                                                            MLX5E_TC_PRIO,
                                                            tc_tbl_size,
                                                            MLX5E_TC_TABLE_NUM_GROUPS,
-                                                           0, 0);
+                                                           MLX5E_TC_FT_LEVEL, 0);
                if (IS_ERR(priv->fs.tc.t)) {
                        netdev_err(priv->netdev,
                                   "Failed to create tc offload table\n");