IB/mlx5: Add steering SW ICM device memory type
authorAriel Levkovich <lariel@mellanox.com>
Sun, 5 May 2019 14:07:13 +0000 (17:07 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Mon, 6 May 2019 15:51:51 +0000 (12:51 -0300)
This patch adds support for allocating, deallocating and registering a new
device memory type, STEERING_SW_ICM.  This memory can be allocated and
used by a privileged user for direct rule insertion and management of the
device's steering tables.

The type is provided by the user via the dedicated attribute in the
alloc_dm ioctl command.

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
include/uapi/rdma/mlx5_user_ioctl_verbs.h

index f0e9c760908357d956ab603b9c888e8231e1f9d4..e3ec79b8f7f507936d170e22c10aa92020c5aed7 100644 (file)
@@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
        return -ENOMEM;
 }
 
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length)
+int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
 {
        struct mlx5_core_dev *dev = dm->dev;
        u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
@@ -186,6 +186,131 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length)
        return err;
 }
 
+int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+                         u16 uid, phys_addr_t *addr, u32 *obj_id)
+{
+       struct mlx5_core_dev *dev = dm->dev;
+       u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+       u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
+       unsigned long *block_map;
+       u64 icm_start_addr;
+       u32 log_icm_size;
+       u32 max_blocks;
+       u64 block_idx;
+       void *sw_icm;
+       int ret;
+
+       MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+                MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+       MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+       switch (type) {
+       case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+               icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+                                               steering_sw_icm_start_address);
+               log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
+               block_map = dm->steering_sw_icm_alloc_blocks;
+               break;
+       case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+               icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+                                       header_modify_sw_icm_start_address);
+               log_icm_size = MLX5_CAP_DEV_MEM(dev,
+                                               log_header_modify_sw_icm_size);
+               block_map = dm->header_modify_sw_icm_alloc_blocks;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+       spin_lock(&dm->lock);
+       block_idx = bitmap_find_next_zero_area(block_map,
+                                              max_blocks,
+                                              0,
+                                              num_blocks, 0);
+
+       if (block_idx < max_blocks)
+               bitmap_set(block_map,
+                          block_idx, num_blocks);
+
+       spin_unlock(&dm->lock);
+
+       if (block_idx >= max_blocks)
+               return -ENOMEM;
+
+       sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
+       icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+       MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
+                  icm_start_addr);
+       MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
+
+       ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (ret) {
+               spin_lock(&dm->lock);
+               bitmap_clear(block_map,
+                            block_idx, num_blocks);
+               spin_unlock(&dm->lock);
+
+               return ret;
+       }
+
+       *addr = icm_start_addr;
+       *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+       return 0;
+}
+
+int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+                           u16 uid, phys_addr_t addr, u32 obj_id)
+{
+       struct mlx5_core_dev *dev = dm->dev;
+       u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+       u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+       unsigned long *block_map;
+       u64 start_idx;
+       int err;
+
+       switch (type) {
+       case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+               start_idx =
+                       (addr - MLX5_CAP64_DEV_MEM(
+                                       dev, steering_sw_icm_start_address)) >>
+                       MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+               block_map = dm->steering_sw_icm_alloc_blocks;
+               break;
+       case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+               start_idx =
+                       (addr -
+                        MLX5_CAP64_DEV_MEM(
+                                dev, header_modify_sw_icm_start_address)) >>
+                       MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+               block_map = dm->header_modify_sw_icm_alloc_blocks;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+                MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+       MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+       err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (err)
+               return err;
+
+       spin_lock(&dm->lock);
+       bitmap_clear(block_map,
+                    start_idx, num_blocks);
+       spin_unlock(&dm->lock);
+
+       return 0;
+}
+
 int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
 {
        u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
index 80a644bea6c7417606f226a8054ea9007c4c980a..0572dcba6eaed0affd8727b2ef52e6b81b3722a5 100644 (file)
@@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
                                void *in, int in_size);
 int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
                         u64 length, u32 alignment);
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length);
+int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
 void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
 void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
 void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
@@ -65,4 +65,8 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
                             u16 uid);
 int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
                     u16 opmod, u8 port);
+int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+                         u16 uid, phys_addr_t *addr, u32 *obj_id);
+int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+                           u16 uid, phys_addr_t addr, u32 obj_id);
 #endif /* MLX5_IB_CMD_H */
index 803aea335953c0681385fd027d09a5f361d14aca..f9def2dfdba301397a9a99c0d3c466798cb98e9b 100644 (file)
@@ -2264,6 +2264,28 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
        return 0;
 }
 
+static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
+                                       u32 type)
+{
+       switch (type) {
+       case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+               if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
+                       return -EOPNOTSUPP;
+               break;
+       case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+               if (!capable(CAP_SYS_RAWIO) ||
+                   !capable(CAP_NET_RAW))
+                       return -EPERM;
+
+               if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+                     MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
+                       return -EOPNOTSUPP;
+               break;
+       }
+
+       return 0;
+}
+
 static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
                                 struct mlx5_ib_dm *dm,
                                 struct ib_dm_alloc_attr *attr,
@@ -2309,6 +2331,40 @@ err_dealloc:
        return err;
 }
 
+static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
+                                 struct mlx5_ib_dm *dm,
+                                 struct ib_dm_alloc_attr *attr,
+                                 struct uverbs_attr_bundle *attrs,
+                                 int type)
+{
+       struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+       u64 act_size;
+       int err;
+
+       /* Allocation size must a multiple of the basic block size
+        * and a power of 2.
+        */
+       act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
+       act_size = roundup_pow_of_two(act_size);
+
+       dm->size = act_size;
+       err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size,
+                                   to_mucontext(ctx)->devx_uid, &dm->dev_addr,
+                                   &dm->icm_dm.obj_id);
+       if (err)
+               return err;
+
+       err = uverbs_copy_to(attrs,
+                            MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+                            &dm->dev_addr, sizeof(dm->dev_addr));
+       if (err)
+               mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size,
+                                       to_mucontext(ctx)->devx_uid,
+                                       dm->dev_addr, dm->icm_dm.obj_id);
+
+       return err;
+}
+
 struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
                               struct ib_ucontext *context,
                               struct ib_dm_alloc_attr *attr,
@@ -2327,6 +2383,10 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
        mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
                    type, attr->length, attr->alignment);
 
+       err = check_dm_type_support(to_mdev(ibdev), type);
+       if (err)
+               return ERR_PTR(err);
+
        dm = kzalloc(sizeof(*dm), GFP_KERNEL);
        if (!dm)
                return ERR_PTR(-ENOMEM);
@@ -2339,6 +2399,10 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
                                            attr,
                                            attrs);
                break;
+       case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+       case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+               err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type);
+               break;
        default:
                err = -EOPNOTSUPP;
        }
@@ -2355,6 +2419,8 @@ err_free:
 
 int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
 {
+       struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
+               &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
        struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
        struct mlx5_ib_dm *dm = to_mdm(ibdm);
        u32 page_idx;
@@ -2371,11 +2437,16 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
                            MLX5_CAP64_DEV_MEM(dm_db->dev,
                                               memic_bar_start_addr)) >>
                           PAGE_SHIFT;
-               bitmap_clear(rdma_udata_to_drv_context(&attrs->driver_udata,
-                                                      struct mlx5_ib_ucontext,
-                                                      ibucontext)
-                                    ->dm_pages,
-                            page_idx, DIV_ROUND_UP(dm->size, PAGE_SIZE));
+               bitmap_clear(ctx->dm_pages, page_idx,
+                            DIV_ROUND_UP(dm->size, PAGE_SIZE));
+               break;
+       case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+       case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+               ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size,
+                                             ctx->devx_uid, dm->dev_addr,
+                                             dm->icm_dm.obj_id);
+               if (ret)
+                       return ret;
                break;
        default:
                return -EOPNOTSUPP;
@@ -5902,6 +5973,8 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
 
 static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
+       struct mlx5_core_dev *mdev = dev->mdev;
+
        mlx5_ib_cleanup_multiport_master(dev);
        if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
                srcu_barrier(&dev->mr_srcu);
@@ -5909,11 +5982,29 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
        }
 
        WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
+
+       WARN_ON(dev->dm.steering_sw_icm_alloc_blocks &&
+               !bitmap_empty(
+                       dev->dm.steering_sw_icm_alloc_blocks,
+                       BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) -
+                           MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
+
+       kfree(dev->dm.steering_sw_icm_alloc_blocks);
+
+       WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks &&
+               !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks,
+                             BIT(MLX5_CAP_DEV_MEM(
+                                         mdev, log_header_modify_sw_icm_size) -
+                                 MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
+
+       kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
 }
 
 static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
+       u64 header_modify_icm_blocks = 0;
+       u64 steering_icm_blocks = 0;
        int err;
        int i;
 
@@ -5959,16 +6050,51 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
        INIT_LIST_HEAD(&dev->qp_list);
        spin_lock_init(&dev->reset_flow_resource_lock);
 
+       if (MLX5_CAP_GEN_64(mdev, general_obj_types) &
+           MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) {
+               if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) {
+                       steering_icm_blocks =
+                               BIT(MLX5_CAP_DEV_MEM(mdev,
+                                                    log_steering_sw_icm_size) -
+                                   MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
+
+                       dev->dm.steering_sw_icm_alloc_blocks =
+                               kcalloc(BITS_TO_LONGS(steering_icm_blocks),
+                                       sizeof(unsigned long), GFP_KERNEL);
+                       if (!dev->dm.steering_sw_icm_alloc_blocks)
+                               goto err_mp;
+               }
+
+               if (MLX5_CAP64_DEV_MEM(mdev,
+                                      header_modify_sw_icm_start_address)) {
+                       header_modify_icm_blocks = BIT(
+                               MLX5_CAP_DEV_MEM(
+                                       mdev, log_header_modify_sw_icm_size) -
+                               MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
+
+                       dev->dm.header_modify_sw_icm_alloc_blocks =
+                               kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
+                                       sizeof(unsigned long), GFP_KERNEL);
+                       if (!dev->dm.header_modify_sw_icm_alloc_blocks)
+                               goto err_dm;
+               }
+       }
+
        spin_lock_init(&dev->dm.lock);
        dev->dm.dev = mdev;
 
        if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
                err = init_srcu_struct(&dev->mr_srcu);
                if (err)
-                       goto err_mp;
+                       goto err_dm;
        }
 
        return 0;
+
+err_dm:
+       kfree(dev->dm.steering_sw_icm_alloc_blocks);
+       kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
+
 err_mp:
        mlx5_ib_cleanup_multiport_master(dev);
 
@@ -6151,7 +6277,9 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
                ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
        }
 
-       if (MLX5_CAP_DEV_MEM(mdev, memic))
+       if (MLX5_CAP_DEV_MEM(mdev, memic) ||
+           MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+           MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
                ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
 
        if (mlx5_accel_ipsec_device_caps(dev->mdev) &
index 714c360dc9fb3726b4902f963289f82c9114cccc..40eb8be482e455dde33761523a6b0848c2e7b928 100644 (file)
@@ -118,6 +118,10 @@ enum {
        MLX5_MEMIC_BASE_SIZE    = 1 << MLX5_MEMIC_BASE_ALIGN,
 };
 
+#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)                                        \
+       (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
+#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+
 struct mlx5_ib_ucontext {
        struct ib_ucontext      ibucontext;
        struct list_head        db_page_list;
@@ -557,6 +561,12 @@ struct mlx5_ib_dm {
        phys_addr_t             dev_addr;
        u32                     type;
        size_t                  size;
+       union {
+               struct {
+                       u32     obj_id;
+               } icm_dm;
+               /* other dm types specific params should be added here */
+       };
 };
 
 #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -567,6 +577,11 @@ struct mlx5_ib_dm {
                                         IB_ACCESS_REMOTE_ATOMIC |\
                                         IB_ZERO_BASED)
 
+#define MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE   |\
+                                         IB_ACCESS_REMOTE_WRITE  |\
+                                         IB_ACCESS_REMOTE_READ   |\
+                                         IB_ZERO_BASED)
+
 struct mlx5_ib_mr {
        struct ib_mr            ibmr;
        void                    *descs;
@@ -854,6 +869,8 @@ struct mlx5_dm {
         */
        spinlock_t lock;
        DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
+       unsigned long *steering_sw_icm_alloc_blocks;
+       unsigned long *header_modify_sw_icm_alloc_blocks;
 };
 
 struct mlx5_read_counters_attr {
index ba35d68e7499735833afe2000fdcd5fe1904f1d6..5f09699fab984ed9179d6bd0775377742f0f6ddc 100644 (file)
@@ -1247,6 +1247,13 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
                mode = MLX5_MKC_ACCESS_MODE_MEMIC;
                start_addr -= pci_resource_start(dev->pdev, 0);
                break;
+       case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+       case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+               if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
+                       return ERR_PTR(-EINVAL);
+
+               mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
+               break;
        default:
                return ERR_PTR(-EINVAL);
        }
index c291fb2f8446174031c478e4be993c492ebd916e..a8f34c2374586bebb6390ff5660004f399ccf822 100644 (file)
@@ -59,6 +59,8 @@ struct mlx5_ib_uapi_devx_async_cmd_hdr {
 
 enum mlx5_ib_uapi_dm_type {
        MLX5_IB_UAPI_DM_TYPE_MEMIC,
+       MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM,
+       MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
 };
 
 #endif