#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include <linux/xarray.h>
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
};
+/* first level XA value data structure */
+struct devx_event {
+ struct xarray object_ids; /* second XA level, Key = object id */
+ struct list_head unaffiliated_list;
+};
+
+/* second level XA value data structure */
+struct devx_obj_event {
+ struct rcu_head rcu;
+ struct list_head obj_sub_list;
+};
+
+struct devx_event_subscription {
+ struct list_head file_list; /* headed in ev_file->
+ * subscribed_events_list
+ */
+ struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
+ * devx_obj_event->obj_sub_list
+ */
+ struct list_head obj_list; /* headed in devx_object */
+ struct list_head event_list; /* headed in ev_file->event_list or in
+ * temp list via subscription
+ */
+
+ u8 is_cleaned:1;
+ u32 xa_key_level1;
+ u32 xa_key_level2;
+ struct rcu_head rcu;
+ u64 cookie;
+ struct devx_async_event_file *ev_file;
+ struct file *filp; /* Upon hot unplug we need a direct access to */
+ struct eventfd_ctx *eventfd;
+};
+
struct devx_async_event_file {
struct ib_uobject uobj;
/* Head of events that are subscribed to this FD */
struct mlx5_ib_devx_mr devx_mr;
struct mlx5_core_dct core_dct;
};
+ struct list_head event_sub; /* holds devx_event_subscription entries */
};
struct devx_umem {
return false;
}
+static bool is_legacy_unaffiliated_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_legacy_obj_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_COMP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static u16 get_legacy_obj_type(u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_EVENT_QUEUE_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_EVENT_QUEUE_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_EVENT_QUEUE_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
+{
+ u16 opcode;
+
+ opcode = (obj->obj_id >> 32) & 0xffff;
+
+ if (is_legacy_obj_event_num(event_num))
+ return get_legacy_obj_type(opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return (obj->obj_id >> 48);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_OBJ_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_OBJ_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_OBJ_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_OBJ_TYPE_DCT;
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_OBJ_TYPE_TIR;
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_OBJ_TYPE_TIS;
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_OBJ_TYPE_PSV;
+ case MLX5_OBJ_TYPE_MKEY:
+ return MLX5_OBJ_TYPE_MKEY;
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_OBJ_TYPE_RMP;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_OBJ_TYPE_XRC_SRQ;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_OBJ_TYPE_XRQ;
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_OBJ_TYPE_RQT;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_OBJ_TYPE_FLOW_COUNTER;
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_OBJ_TYPE_CQ;
+ default:
+ return 0;
+ }
+}
+
+static u32 get_dec_obj_id(u64 obj_id)
+{
+ return (obj_id & 0xffffffff);
+}
+
/*
* As the obj_id in the firmware is not globally unique the object type
* must be considered upon checking for a valid object id.
mlx5_base_mkey(obj->devx_mr.mmkey.key));
}
+static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
+ struct devx_event_subscription *sub)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ if (sub->is_cleaned)
+ return;
+
+ sub->is_cleaned = 1;
+ list_del_rcu(&sub->xa_list);
+
+ if (list_empty(&sub->obj_list))
+ return;
+
+ list_del_rcu(&sub->obj_list);
+ /* check whether key level 1 for this obj_sub_list is empty */
+ event = xa_load(&dev->devx_event_table.event_xa,
+ sub->xa_key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ sub->xa_key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
static int devx_obj_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
{
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_devx_event_table *devx_event_table;
struct devx_obj *obj = uobject->object;
+ struct devx_event_subscription *sub_entry, *tmp;
+ struct mlx5_ib_dev *dev;
int ret;
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
devx_cleanup_mkey(obj);
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- struct mlx5_ib_dev *dev =
- mlx5_udata_to_mdev(&attrs->driver_udata);
+ devx_event_table = &dev->devx_event_table;
+
+ mutex_lock(&devx_event_table->event_xa_lock);
+ list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
+ devx_cleanup_subscription(dev, sub_entry);
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
devx_free_indirect_mkey);
return ret;
uobj->object = obj;
obj->mdev = dev->mdev;
+ INIT_LIST_HEAD(&obj->event_sub);
devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
ev_file->omit_data = 1;
INIT_LIST_HEAD(&ev_file->subscribed_events_list);
ev_file->dev = dev;
+ get_device(&dev->ib_dev.dev);
return 0;
}
return err;
}
+static void
+subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ /* Level 1 is valid for future use, no need to free */
+ if (!is_level2)
+ return;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids,
+ key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int
+subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_obj_event *obj_event;
+ struct devx_event *event;
+ int err;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ if (!event) {
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&event->unaffiliated_list);
+ xa_init(&event->object_ids);
+
+ err = xa_insert(&devx_event_table->event_xa,
+ key_level1,
+ event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(event);
+ return err;
+ }
+ }
+
+ if (!is_level2)
+ return 0;
+
+ obj_event = xa_load(&event->object_ids, key_level2);
+ if (!obj_event) {
+ obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
+ if (!obj_event)
+ /* Level1 is valid for future use, no need to free */
+ return -ENOMEM;
+
+ err = xa_insert(&event->object_ids,
+ key_level2,
+ obj_event,
+ GFP_KERNEL);
+ if (err)
+ return err;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
+ }
+
+ return 0;
+}
+
+static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (obj) {
+ if (!is_legacy_obj_event_num(event_type_num_list[i]))
+ return false;
+ } else if (!is_legacy_unaffiliated_event_num(
+ event_type_num_list[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define MAX_SUPP_EVENT_NUM 255
+static bool is_valid_events(struct mlx5_core_dev *dev,
+ int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ __be64 *aff_events;
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+ int i;
+
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ aff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_affiliated_events);
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ } else {
+ return is_valid_events_legacy(num_events, event_type_num_list,
+ obj);
+ }
+
+ for (i = 0; i < num_events; i++) {
+ if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
+ return false;
+
+ mask_entry = event_type_num_list[i] / 64;
+ mask_bit = event_type_num_list[i] % 64;
+
+ if (obj) {
+ /* CQ completion */
+ if (event_type_num_list[i] == 0)
+ continue;
+
+ if (!(be64_to_cpu(aff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+
+ continue;
+ }
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+ }
+
+ return true;
+}
+
+#define MAX_NUM_EVENTS 16
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ struct ib_uobject *fd_uobj;
+ struct devx_obj *obj = NULL;
+ struct devx_async_event_file *ev_file;
+ struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
+ u16 *event_type_num_list;
+ struct devx_event_subscription *event_sub, *tmp_sub;
+ struct list_head sub_list;
+ int redirect_fd;
+ bool use_eventfd = false;
+ int num_events;
+ int num_alloc_xa_entries = 0;
+ u16 obj_type = 0;
+ u64 cookie = 0;
+ u32 obj_id = 0;
+ int err;
+ int i;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!IS_ERR(devx_uobj)) {
+ obj = (struct devx_obj *)devx_uobj->object;
+ if (obj)
+ obj_id = get_dec_obj_id(obj->obj_id);
+ }
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_event_file,
+ uobj);
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
+ err = uverbs_copy_from(&redirect_fd, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
+ if (err)
+ return err;
+
+ use_eventfd = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
+ if (use_eventfd)
+ return -EINVAL;
+
+ err = uverbs_copy_from(&cookie, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
+ if (err)
+ return err;
+ }
+
+ num_events = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ sizeof(u16));
+
+ if (num_events < 0)
+ return num_events;
+
+ if (num_events > MAX_NUM_EVENTS)
+ return -EINVAL;
+
+ event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
+
+ if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&sub_list);
+
+ /* Protect from concurrent subscriptions to same XA entries to allow
+ * both to succeed
+ */
+ mutex_lock(&devx_event_table->event_xa_lock);
+ for (i = 0; i < num_events; i++) {
+ u32 key_level1;
+
+ if (obj)
+ obj_type = get_dec_obj_type(obj,
+ event_type_num_list[i]);
+ key_level1 = event_type_num_list[i] | obj_type << 16;
+
+ err = subscribe_event_xa_alloc(devx_event_table,
+ key_level1,
+ obj,
+ obj_id);
+ if (err)
+ goto err;
+
+ num_alloc_xa_entries++;
+ event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
+ if (!event_sub)
+ goto err;
+
+ list_add_tail(&event_sub->event_list, &sub_list);
+ if (use_eventfd) {
+ event_sub->eventfd =
+ eventfd_ctx_fdget(redirect_fd);
+
+ if (IS_ERR(event_sub)) {
+ err = PTR_ERR(event_sub->eventfd);
+ event_sub->eventfd = NULL;
+ goto err;
+ }
+ }
+
+ event_sub->cookie = cookie;
+ event_sub->ev_file = ev_file;
+ event_sub->filp = fd_uobj->object;
+ /* May be needed upon cleanup the devx object/subscription */
+ event_sub->xa_key_level1 = key_level1;
+ event_sub->xa_key_level2 = obj_id;
+ INIT_LIST_HEAD(&event_sub->obj_list);
+ }
+
+ /* Once all the allocations and the XA data insertions were done we
+ * can go ahead and add all the subscriptions to the relevant lists
+ * without concern of a failure.
+ */
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+
+ list_del_init(&event_sub->event_list);
+
+ spin_lock_irq(&ev_file->lock);
+ list_add_tail_rcu(&event_sub->file_list,
+ &ev_file->subscribed_events_list);
+ spin_unlock_irq(&ev_file->lock);
+
+ event = xa_load(&devx_event_table->event_xa,
+ event_sub->xa_key_level1);
+ WARN_ON(!event);
+
+ if (!obj) {
+ list_add_tail_rcu(&event_sub->xa_list,
+ &event->unaffiliated_list);
+ continue;
+ }
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ WARN_ON(!obj_event);
+ list_add_tail_rcu(&event_sub->xa_list,
+ &obj_event->obj_sub_list);
+ list_add_tail_rcu(&event_sub->obj_list,
+ &obj->event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return 0;
+
+err:
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ list_del(&event_sub->event_list);
+
+ subscribe_event_xa_dealloc(devx_event_table,
+ event_sub->xa_key_level1,
+ obj,
+ obj_id);
+
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+
+ kfree(event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return err;
+}
+
static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
struct uverbs_attr_bundle *attrs,
struct devx_umem *obj)
void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
{
struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ struct devx_event_subscription *sub, *tmp;
+ struct devx_event *event;
void *entry;
unsigned long id;
mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
-
- xa_for_each(&table->event_xa, id, entry)
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
+ xa_list)
+ devx_cleanup_subscription(dev, sub);
kfree(entry);
-
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
xa_destroy(&table->event_xa);
}
static int devx_async_event_close(struct inode *inode, struct file *filp)
{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub, *event_sub_tmp;
+
+ mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock);
+ /* delete the subscriptions which are related to this FD */
+ list_for_each_entry_safe(event_sub, event_sub_tmp,
+ &ev_file->subscribed_events_list, file_list) {
+ devx_cleanup_subscription(ev_file->dev, event_sub);
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+
+ list_del_rcu(&event_sub->file_list);
+ /* subscription may not be used by the read API any more */
+ kfree_rcu(event_sub, rcu);
+ }
+
+ mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock);
+
uverbs_close_fd(filp);
+ put_device(&ev_file->dev->ib_dev.dev);
return 0;
}
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
- &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),