From: Sridhar Samudrala Date: Thu, 24 May 2018 16:55:13 +0000 (-0700) Subject: net: Introduce generic failover module X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=30c8bd5aa8b2c78546c3e52337101b9c85879320;p=openwrt%2Fstaging%2Fblogic.git net: Introduce generic failover module The failover module provides a generic interface for paravirtual drivers to register a netdev and a set of ops with a failover instance. The ops are used as event handlers that get called to handle netdev register/ unregister/link change/name change events on slave pci ethernet devices with the same mac address as the failover netdev. This enables paravirtual drivers to use a VF as an accelerated low latency datapath. It also allows migration of VMs with direct attached VFs by failing over to the paravirtual datapath when the VF is unplugged. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- diff --git a/Documentation/networking/failover.rst b/Documentation/networking/failover.rst new file mode 100644 index 000000000000..f0c8483cdbf5 --- /dev/null +++ b/Documentation/networking/failover.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======== +FAILOVER +======== + +Overview +======== + +The failover module provides a generic interface for paravirtual drivers +to register a netdev and a set of ops with a failover instance. The ops +are used as event handlers that get called to handle netdev register/ +unregister/link change/name change events on slave pci ethernet devices +with the same mac address as the failover netdev. + +This enables paravirtual drivers to use a VF as an accelerated low latency +datapath. It also allows live migration of VMs with direct attached VFs by +failing over to the paravirtual datapath when the VF is unplugged. diff --git a/MAINTAINERS b/MAINTAINERS index f492431b239b..6c59bdf49a8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5411,6 +5411,14 @@ S: Maintained F: Documentation/hwmon/f71805f F: drivers/hwmon/f71805f.c +FAILOVER MODULE +M: Sridhar Samudrala +L: netdev@vger.kernel.org +S: Supported +F: net/core/failover.c +F: include/net/failover.h +F: Documentation/networking/failover.rst + FANOTIFY M: Jan Kara R: Amir Goldstein diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8452f72087ef..f45b1a4e37ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1425,6 +1425,8 @@ struct net_device_ops { * entity (i.e. the master device for bridged veth) * @IFF_MACSEC: device is a MACsec device * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook + * @IFF_FAILOVER: device is a failover master device + * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1454,6 +1456,8 @@ enum netdev_priv_flags { IFF_PHONY_HEADROOM = 1<<24, IFF_MACSEC = 1<<25, IFF_NO_RX_HANDLER = 1<<26, + IFF_FAILOVER = 1<<27, + IFF_FAILOVER_SLAVE = 1<<28, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1482,6 +1486,8 @@ enum netdev_priv_flags { #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED #define IFF_MACSEC IFF_MACSEC #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER +#define IFF_FAILOVER IFF_FAILOVER +#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE /** * struct net_device - The DEVICE structure. @@ -4336,6 +4342,16 @@ static inline bool netif_is_rxfh_configured(const struct net_device *dev) return dev->priv_flags & IFF_RXFH_CONFIGURED; } +static inline bool netif_is_failover(const struct net_device *dev) +{ + return dev->priv_flags & IFF_FAILOVER; +} + +static inline bool netif_is_failover_slave(const struct net_device *dev) +{ + return dev->priv_flags & IFF_FAILOVER_SLAVE; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { diff --git a/include/net/failover.h b/include/net/failover.h new file mode 100644 index 000000000000..bb15438f39c7 --- /dev/null +++ b/include/net/failover.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _FAILOVER_H +#define _FAILOVER_H + +#include + +struct failover_ops { + int (*slave_pre_register)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_register)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_pre_unregister)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_unregister)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_link_change)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_name_change)(struct net_device *slave_dev, + struct net_device *failover_dev); + rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb); +}; + +struct failover { + struct list_head list; + struct net_device __rcu *failover_dev; + struct failover_ops __rcu *ops; +}; + +struct failover *failover_register(struct net_device *dev, + struct failover_ops *ops); +void failover_unregister(struct failover *failover); +int failover_slave_unregister(struct net_device *slave_dev); + +#endif /* _FAILOVER_H */ diff --git a/net/Kconfig b/net/Kconfig index ba554cedb615..f738a6f27665 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -432,6 +432,19 @@ config MAY_USE_DEVLINK config PAGE_POOL bool +config FAILOVER + tristate "Generic failover module" + help + The failover module provides a generic interface for paravirtual + drivers to register a netdev and a set of ops with a failover + instance. The ops are used as event handlers that get called to + handle netdev register/unregister/link change/name change events + on slave pci ethernet devices with the same mac address as the + failover netdev. This enables paravirtual drivers to use a + VF as an accelerated low latency datapath. It also allows live + migration of VMs with direct attached VFs by failing over to the + paravirtual datapath when the VF is unplugged. + endif # if NET # Used by archs to tell that they support BPF JIT compiler plus which flavour. diff --git a/net/core/Makefile b/net/core/Makefile index 7080417f8bc8..80175e6a2eb8 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -31,3 +31,4 @@ obj-$(CONFIG_DST_CACHE) += dst_cache.o obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o +obj-$(CONFIG_FAILOVER) += failover.o diff --git a/net/core/failover.c b/net/core/failover.c new file mode 100644 index 000000000000..4a92a98ccce9 --- /dev/null +++ b/net/core/failover.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +/* A common module to handle registrations and notifications for paravirtual + * drivers to enable accelerated datapath and support VF live migration. + * + * The notifier and event handling code is based on netvsc driver. + */ + +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(failover_list); +static DEFINE_SPINLOCK(failover_lock); + +static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops) +{ + struct net_device *failover_dev; + struct failover *failover; + + spin_lock(&failover_lock); + list_for_each_entry(failover, &failover_list, list) { + failover_dev = rtnl_dereference(failover->failover_dev); + if (ether_addr_equal(failover_dev->perm_addr, mac)) { + *ops = rtnl_dereference(failover->ops); + spin_unlock(&failover_lock); + return failover_dev; + } + } + spin_unlock(&failover_lock); + return NULL; +} + +/** + * failover_slave_register - Register a slave netdev + * + * @slave_dev: slave netdev that is being registered + * + * Registers a slave device to a failover instance. Only ethernet devices + * are supported. + */ +static int failover_slave_register(struct net_device *slave_dev) +{ + struct netdev_lag_upper_info lag_upper_info; + struct net_device *failover_dev; + struct failover_ops *fops; + int err; + + if (slave_dev->type != ARPHRD_ETHER) + goto done; + + ASSERT_RTNL(); + + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); + if (!failover_dev) + goto done; + + if (fops && fops->slave_pre_register && + fops->slave_pre_register(slave_dev, failover_dev)) + goto done; + + err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame, + failover_dev); + if (err) { + netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n", + err); + goto done; + } + + lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; + err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL, + &lag_upper_info, NULL); + if (err) { + netdev_err(slave_dev, "can not set failover device %s (err = %d)\n", + failover_dev->name, err); + goto err_upper_link; + } + + slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; + + if (fops && fops->slave_register && + !fops->slave_register(slave_dev, failover_dev)) + return NOTIFY_OK; + + netdev_upper_dev_unlink(slave_dev, failover_dev); + slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; +err_upper_link: + netdev_rx_handler_unregister(slave_dev); +done: + return NOTIFY_DONE; +} + +/** + * failover_slave_unregister - Unregister a slave netdev + * + * @slave_dev: slave netdev that is being unregistered + * + * Unregisters a slave device from a failover instance. + */ +int failover_slave_unregister(struct net_device *slave_dev) +{ + struct net_device *failover_dev; + struct failover_ops *fops; + + if (!netif_is_failover_slave(slave_dev)) + goto done; + + ASSERT_RTNL(); + + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); + if (!failover_dev) + goto done; + + if (fops && fops->slave_pre_unregister && + fops->slave_pre_unregister(slave_dev, failover_dev)) + goto done; + + netdev_rx_handler_unregister(slave_dev); + netdev_upper_dev_unlink(slave_dev, failover_dev); + slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + + if (fops && fops->slave_unregister && + !fops->slave_unregister(slave_dev, failover_dev)) + return NOTIFY_OK; + +done: + return NOTIFY_DONE; +} +EXPORT_SYMBOL_GPL(failover_slave_unregister); + +static int failover_slave_link_change(struct net_device *slave_dev) +{ + struct net_device *failover_dev; + struct failover_ops *fops; + + if (!netif_is_failover_slave(slave_dev)) + goto done; + + ASSERT_RTNL(); + + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); + if (!failover_dev) + goto done; + + if (!netif_running(failover_dev)) + goto done; + + if (fops && fops->slave_link_change && + !fops->slave_link_change(slave_dev, failover_dev)) + return NOTIFY_OK; + +done: + return NOTIFY_DONE; +} + +static int failover_slave_name_change(struct net_device *slave_dev) +{ + struct net_device *failover_dev; + struct failover_ops *fops; + + if (!netif_is_failover_slave(slave_dev)) + goto done; + + ASSERT_RTNL(); + + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); + if (!failover_dev) + goto done; + + if (!netif_running(failover_dev)) + goto done; + + if (fops && fops->slave_name_change && + !fops->slave_name_change(slave_dev, failover_dev)) + return NOTIFY_OK; + +done: + return NOTIFY_DONE; +} + +static int +failover_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + + /* Skip parent events */ + if (netif_is_failover(event_dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + return failover_slave_register(event_dev); + case NETDEV_UNREGISTER: + return failover_slave_unregister(event_dev); + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + return failover_slave_link_change(event_dev); + case NETDEV_CHANGENAME: + return failover_slave_name_change(event_dev); + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block failover_notifier = { + .notifier_call = failover_event, +}; + +static void +failover_existing_slave_register(struct net_device *failover_dev) +{ + struct net *net = dev_net(failover_dev); + struct net_device *dev; + + rtnl_lock(); + for_each_netdev(net, dev) { + if (netif_is_failover(dev)) + continue; + if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) + failover_slave_register(dev); + } + rtnl_unlock(); +} + +/** + * failover_register - Register a failover instance + * + * @dev: failover netdev + * @ops: failover ops + * + * Allocate and register a failover instance for a failover netdev. ops + * provides handlers for slave device register/unregister/link change/ + * name change events. + * + * Return: pointer to failover instance + */ +struct failover *failover_register(struct net_device *dev, + struct failover_ops *ops) +{ + struct failover *failover; + + if (dev->type != ARPHRD_ETHER) + return ERR_PTR(-EINVAL); + + failover = kzalloc(sizeof(*failover), GFP_KERNEL); + if (!failover) + return ERR_PTR(-ENOMEM); + + rcu_assign_pointer(failover->ops, ops); + dev_hold(dev); + dev->priv_flags |= IFF_FAILOVER; + rcu_assign_pointer(failover->failover_dev, dev); + + spin_lock(&failover_lock); + list_add_tail(&failover->list, &failover_list); + spin_unlock(&failover_lock); + + netdev_info(dev, "failover master:%s registered\n", dev->name); + + failover_existing_slave_register(dev); + + return failover; +} +EXPORT_SYMBOL_GPL(failover_register); + +/** + * failover_unregister - Unregister a failover instance + * + * @failover: pointer to failover instance + * + * Unregisters and frees a failover instance. + */ +void failover_unregister(struct failover *failover) +{ + struct net_device *failover_dev; + + failover_dev = rcu_dereference(failover->failover_dev); + + netdev_info(failover_dev, "failover master:%s unregistered\n", + failover_dev->name); + + failover_dev->priv_flags &= ~IFF_FAILOVER; + dev_put(failover_dev); + + spin_lock(&failover_lock); + list_del(&failover->list); + spin_unlock(&failover_lock); + + kfree(failover); +} +EXPORT_SYMBOL_GPL(failover_unregister); + +static __init int +failover_init(void) +{ + register_netdevice_notifier(&failover_notifier); + + return 0; +} +module_init(failover_init); + +static __exit +void failover_exit(void) +{ + unregister_netdevice_notifier(&failover_notifier); +} +module_exit(failover_exit); + +MODULE_DESCRIPTION("Generic failover infrastructure/interface"); +MODULE_LICENSE("GPL v2");