RDMA/core: Add a netlink command to change net namespace of rdma device
authorParav Pandit <parav@mellanox.com>
Mon, 15 Apr 2019 10:22:51 +0000 (13:22 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Mon, 22 Apr 2019 17:44:58 +0000 (14:44 -0300)
Provide an option to change the net namespace of a rdma device through a
netlink command. When multiple rdma devices exists in a system, and when
containers are used, this will limit rdma device visibility to a specified
net namespace.

An example command to change net namespace of mlx5_1 device to the
previously created net namespace 'foo' is:

$ ip netns add foo
$ rdma dev set mlx5_1 netns foo

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/nldev.c
include/uapi/rdma/rdma_netlink.h

index 5b0ffbb6b3c9c03816a3e5736d718fc3fa10d4e0..d4dd360769cbbe075ef6eeb24c69ee2e4c7084b6 100644 (file)
@@ -350,4 +350,6 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
                                 const char *name);
 void ib_port_unregister_module_stat(struct kobject *kobj);
 
+int ib_device_set_netns_put(struct sk_buff *skb,
+                           struct ib_device *dev, u32 ns_fd);
 #endif /* _CORE_PRIV_H */
index 7fe4f8b880ee64148000a6b979e11c77289d9e48..fcbf2d4c865dec089cfd26a8679108db5d12a7bf 100644 (file)
@@ -1452,9 +1452,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
        mutex_lock(&device->unregistration_lock);
 
        /*
-        * If a device not under ib_device_get() or the unregistration_lock
-        * the namespace can be changed, or it can be unregistered. Check
-        * again under the lock.
+        * If a device not under ib_device_get() or if the unregistration_lock
+        * is not held, the namespace can be changed, or it can be unregistered.
+        * Check again under the lock.
         */
        if (refcount_read(&device->refcount) == 0 ||
            !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
@@ -1471,12 +1471,12 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
         */
        write_pnet(&device->coredev.rdma_net, net);
 
+       down_read(&devices_rwsem);
        /*
         * Currently rdma devices are system wide unique. So the device name
         * is guaranteed free in the new namespace. Publish the new namespace
         * at the sysfs level.
         */
-       down_read(&devices_rwsem);
        ret = device_rename(&device->dev, dev_name(&device->dev));
        up_read(&devices_rwsem);
        if (ret) {
@@ -1488,7 +1488,7 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
        }
 
        ret2 = enable_device_and_get(device);
-       if (ret2)
+       if (ret2) {
                /*
                 * This shouldn't really happen, but if it does, let the user
                 * retry at later point. So don't disable the device.
@@ -1496,7 +1496,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
                dev_warn(&device->dev,
                         "%s: Couldn't re-enable device after namespace change\n",
                         __func__);
+       }
        kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
        ib_device_put(device);
 out:
        mutex_unlock(&device->unregistration_lock);
@@ -1505,6 +1507,50 @@ out:
        return ret2;
 }
 
+int ib_device_set_netns_put(struct sk_buff *skb,
+                           struct ib_device *dev, u32 ns_fd)
+{
+       struct net *net;
+       int ret;
+
+       net = get_net_ns_by_fd(ns_fd);
+       if (IS_ERR(net)) {
+               ret = PTR_ERR(net);
+               goto net_err;
+       }
+
+       if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+               ret = -EPERM;
+               goto ns_err;
+       }
+
+       /*
+        * Currently supported only for those providers which support
+        * disassociation and don't do port specific sysfs init. Once a
+        * port_cleanup infrastructure is implemented, this limitation will be
+        * removed.
+        */
+       if (!dev->ops.disassociate_ucontext || dev->ops.init_port ||
+           ib_devices_shared_netns) {
+               ret = -EOPNOTSUPP;
+               goto ns_err;
+       }
+
+       get_device(&dev->dev);
+       ib_device_put(dev);
+       ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net);
+       put_device(&dev->dev);
+
+       put_net(net);
+       return ret;
+
+ns_err:
+       put_net(net);
+net_err:
+       ib_device_put(dev);
+       return ret;
+}
+
 static struct pernet_operations rdma_dev_net_ops = {
        .init = rdma_dev_init_net,
        .exit = rdma_dev_exit_net,
index 8cb3851d212ecd244a4e10387f2321df0eee53b8..bced945a456d3eba62af3e339e3ebd6d2ce1ff2f 100644 (file)
@@ -119,6 +119,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
        [RDMA_NLDEV_SYS_ATTR_NETNS_MODE]        = { .type = NLA_U8 },
        [RDMA_NLDEV_ATTR_DEV_PROTOCOL]          = { .type = NLA_NUL_STRING,
                                    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
+       [RDMA_NLDEV_NET_NS_FD]                  = { .type = NLA_U32 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -691,9 +692,20 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
                            IB_DEVICE_NAME_MAX);
                err = ib_device_rename(device, name);
+               goto done;
        }
 
+       if (tb[RDMA_NLDEV_NET_NS_FD]) {
+               u32 ns_fd;
+
+               ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
+               err = ib_device_set_netns_put(skb, device, ns_fd);
+               goto put_done;
+       }
+
+done:
        ib_device_put(device);
+put_done:
        return err;
 }
 
@@ -909,7 +921,6 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
                nlmsg_cancel(skb, nlh);
                goto out;
        }
-
        nlmsg_end(skb, nlh);
 
        idx++;
index d49f491341f63da6cfebd3dc36b49abb8460516a..42a8bdc40a14e6d984e570926074019007f8a7a8 100644 (file)
@@ -469,12 +469,16 @@ enum rdma_nldev_attr {
         * either shared or exclusive among multiple net namespaces.
         */
        RDMA_NLDEV_SYS_ATTR_NETNS_MODE,         /* u8 */
-
        /*
         * Device protocol, e.g. ib, iw, usnic, roce and opa
         */
        RDMA_NLDEV_ATTR_DEV_PROTOCOL,           /* string */
 
+       /*
+        * File descriptor handle of the net namespace object
+        */
+       RDMA_NLDEV_NET_NS_FD,                   /* u32 */
+
        /*
         * Always the end
         */