From: Felix Fietkau Date: Mon, 12 Mar 2018 19:51:55 +0000 (+0100) Subject: kernel: add hardware offload patch for flow tables support X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=11a3d270436137fa485257eab8153ce5d307cfa7;p=openwrt%2Fstaging%2Fadrian.git kernel: add hardware offload patch for flow tables support Supports offloading through VLAN, bridge and PPPoE devices as well Signed-off-by: Felix Fietkau --- diff --git a/package/kernel/linux/modules/netfilter.mk b/package/kernel/linux/modules/netfilter.mk index f859a67496..f296a9096e 100644 --- a/package/kernel/linux/modules/netfilter.mk +++ b/package/kernel/linux/modules/netfilter.mk @@ -149,8 +149,9 @@ define KernelPackage/nf-flow CONFIG_NF_FLOW_TABLE_HW DEPENDS:=+kmod-nf-conntrack @!LINUX_3_18 @!LINUX_4_4 @!LINUX_4_9 FILES:= \ - $(LINUX_DIR)/net/netfilter/nf_flow_table.ko - AUTOLOAD:=$(call AutoProbe,nf_flow_table) + $(LINUX_DIR)/net/netfilter/nf_flow_table.ko \ + $(LINUX_DIR)/net/netfilter/nf_flow_table_hw.ko + AUTOLOAD:=$(call AutoProbe,nf_flow_table nf_flow_table_hw) endef $(eval $(call KernelPackage,nf-flow)) diff --git a/target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch b/target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch new file mode 100644 index 0000000000..7dc4a010c6 --- /dev/null +++ b/target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch @@ -0,0 +1,580 @@ +From: Pablo Neira Ayuso +Date: Thu, 11 Jan 2018 16:32:00 +0100 +Subject: [PATCH] netfilter: nf_flow_table: add hardware offload support + +This patch adds the infrastructure to offload flows to hardware, in case +the nic/switch comes with built-in flow tables capabilities. + +If the hardware comes with no hardware flow tables or they have +limitations in terms of features, the existing infrastructure falls back +to the software flow table implementation. + +The software flow table garbage collector skips entries that resides in +the hardware, so the hardware will be responsible for releasing this +flow table entry too via flow_offload_dead(). + +Hardware configuration, either to add or to delete entries, is done from +the hardware offload workqueue, to ensure this is done from user context +given that we may sleep when grabbing the mdio mutex. + +Signed-off-by: Pablo Neira Ayuso +--- + create mode 100644 net/netfilter/nf_flow_table_hw.c + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -826,6 +826,13 @@ struct xfrmdev_ops { + }; + #endif + ++struct flow_offload; ++ ++enum flow_offload_type { ++ FLOW_OFFLOAD_ADD = 0, ++ FLOW_OFFLOAD_DEL, ++}; ++ + /* + * This structure defines the management hooks for network devices. + * The following hooks can be defined; unless noted otherwise, they are +@@ -1057,6 +1064,10 @@ struct xfrmdev_ops { + * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags); + * ++ * int (*ndo_flow_offload)(enum flow_offload_type type, ++ * struct flow_offload *flow); ++ * Adds/deletes flow entry to/from net device flowtable. ++ * + * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); + * Called to change device carrier. Soft-devices (like dummy, team, etc) + * which do not represent real hardware may define this to allow their +@@ -1281,6 +1292,8 @@ struct net_device_ops { + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags); ++ int (*ndo_flow_offload)(enum flow_offload_type type, ++ struct flow_offload *flow); + int (*ndo_change_carrier)(struct net_device *dev, + bool new_carrier); + int (*ndo_get_phys_port_id)(struct net_device *dev, +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -20,11 +20,17 @@ struct nf_flowtable_type { + struct module *owner; + }; + ++enum nf_flowtable_flags { ++ NF_FLOWTABLE_F_HW = 0x1, ++}; ++ + struct nf_flowtable { + struct list_head list; + struct rhashtable rhashtable; + const struct nf_flowtable_type *type; ++ u32 flags; + struct delayed_work gc_work; ++ possible_net_t ft_net; + }; + + enum flow_offload_tuple_dir { +@@ -69,6 +75,7 @@ struct flow_offload_tuple_rhash { + #define FLOW_OFFLOAD_DNAT 0x2 + #define FLOW_OFFLOAD_DYING 0x4 + #define FLOW_OFFLOAD_TEARDOWN 0x8 ++#define FLOW_OFFLOAD_HW 0x10 + + struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; +@@ -126,6 +133,22 @@ unsigned int nf_flow_offload_ip_hook(voi + unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + ++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct); ++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow); ++ ++struct nf_flow_table_hw { ++ struct module *owner; ++ void (*add)(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct); ++ void (*del)(struct net *net, struct flow_offload *flow); ++}; ++ ++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload); ++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload); ++ ++extern struct work_struct nf_flow_offload_hw_work; ++ + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -1341,6 +1341,7 @@ enum nft_object_attributes { + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) ++ * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32) + */ + enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, +@@ -1350,6 +1351,7 @@ enum nft_flowtable_attributes { + NFTA_FLOWTABLE_USE, + NFTA_FLOWTABLE_HANDLE, + NFTA_FLOWTABLE_PAD, ++ NFTA_FLOWTABLE_FLAGS, + __NFTA_FLOWTABLE_MAX + }; + #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -686,6 +686,15 @@ config NF_FLOW_TABLE + + To compile it as a module, choose M here. + ++config NF_FLOW_TABLE_HW ++ tristate "Netfilter flow table hardware offload module" ++ depends on NF_FLOW_TABLE ++ help ++ This option adds hardware offload support for the flow table core ++ infrastructure. ++ ++ To compile it as a module, choose M here. ++ + config NETFILTER_XTABLES + tristate "Netfilter Xtables support (required for ip_tables)" + default m if NETFILTER_ADVANCED=n +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -116,6 +116,7 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_t + nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o + + obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o ++obj-$(CONFIG_NF_FLOW_TABLE_HW) += nf_flow_table_hw.o + + # generic X tables + obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable + } + EXPORT_SYMBOL_GPL(flow_offload_add); + ++static inline bool nf_flow_in_hw(const struct flow_offload *flow) ++{ ++ return flow->flags & FLOW_OFFLOAD_HW; ++} ++ + static void flow_offload_del(struct nf_flowtable *flow_table, + struct flow_offload *flow) + { + struct flow_offload_entry *e; ++ struct net *net = read_pnet(&flow_table->ft_net); + + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, +@@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f + e = container_of(flow, struct flow_offload_entry, flow); + clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); + ++ if (nf_flow_in_hw(flow)) ++ nf_flow_offload_hw_del(net, flow); ++ + flow_offload_free(flow); + } + +@@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc + rhashtable_walk_start(&hti); + + while ((tuplehash = rhashtable_walk_next(&hti))) { ++ bool teardown; + if (IS_ERR(tuplehash)) { + err = PTR_ERR(tuplehash); + if (err != -EAGAIN) +@@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + +- if (nf_flow_has_expired(flow) || +- (flow->flags & (FLOW_OFFLOAD_DYING | +- FLOW_OFFLOAD_TEARDOWN))) ++ teardown = flow->flags & (FLOW_OFFLOAD_DYING | ++ FLOW_OFFLOAD_TEARDOWN); ++ ++ if (nf_flow_in_hw(flow) && !teardown) ++ continue; ++ ++ if (nf_flow_has_expired(flow) || teardown) + flow_offload_del(flow_table, flow); + } + out: +@@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_ + } + EXPORT_SYMBOL_GPL(nf_flow_dnat_port); + ++static const struct nf_flow_table_hw __rcu *nf_flow_table_hw_hook __read_mostly; ++ ++static int nf_flow_offload_hw_init(struct nf_flowtable *flow_table) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ if (!rcu_access_pointer(nf_flow_table_hw_hook)) ++ request_module("nf-flow-table-hw"); ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (!offload) ++ goto err_no_hw_offload; ++ ++ if (!try_module_get(offload->owner)) ++ goto err_no_hw_offload; ++ ++ rcu_read_unlock(); ++ ++ return 0; ++ ++err_no_hw_offload: ++ rcu_read_unlock(); ++ ++ return -EOPNOTSUPP; ++} ++ + int nf_flow_table_init(struct nf_flowtable *flowtable) + { + int err; + ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) { ++ err = nf_flow_offload_hw_init(flowtable); ++ if (err) ++ return err; ++ } ++ + INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); + + err = rhashtable_init(&flowtable->rhashtable, +@@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu + { + nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); + flush_delayed_work(&flowtable->gc_work); ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) ++ flush_work(&nf_flow_offload_hw_work); + } + + void nf_flow_table_cleanup(struct net *net, struct net_device *dev) +@@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n + } + EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); + ++struct work_struct nf_flow_offload_hw_work; ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_work); ++ ++/* Give the hardware workqueue the chance to remove entries from hardware.*/ ++static void nf_flow_offload_hw_free(struct nf_flowtable *flowtable) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ flush_work(&nf_flow_offload_hw_work); ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (!offload) { ++ rcu_read_unlock(); ++ return; ++ } ++ module_put(offload->owner); ++ rcu_read_unlock(); ++} ++ + void nf_flow_table_free(struct nf_flowtable *flow_table) + { + mutex_lock(&flowtable_lock); +@@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta + nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); + WARN_ON(!nf_flow_offload_gc_step(flow_table)); + rhashtable_destroy(&flow_table->rhashtable); ++ if (flow_table->flags & NF_FLOWTABLE_F_HW) ++ nf_flow_offload_hw_free(flow_table); + } + EXPORT_SYMBOL_GPL(nf_flow_table_free); + ++/* Must be called from user context. */ ++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (offload) ++ offload->add(net, flow, ct); ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_add); ++ ++/* Must be called from user context. */ ++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (offload) ++ offload->del(net, flow); ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_del); ++ ++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload) ++{ ++ if (rcu_access_pointer(nf_flow_table_hw_hook)) ++ return -EBUSY; ++ ++ rcu_assign_pointer(nf_flow_table_hw_hook, offload); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_hw_register); ++ ++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload) ++{ ++ WARN_ON(rcu_access_pointer(nf_flow_table_hw_hook) != offload); ++ rcu_assign_pointer(nf_flow_table_hw_hook, NULL); ++ ++ synchronize_rcu(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_hw_unregister); ++ + static int nf_flow_table_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) + { +--- /dev/null ++++ b/net/netfilter/nf_flow_table_hw.c +@@ -0,0 +1,169 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static DEFINE_SPINLOCK(flow_offload_hw_pending_list_lock); ++static LIST_HEAD(flow_offload_hw_pending_list); ++ ++static DEFINE_MUTEX(nf_flow_offload_hw_mutex); ++ ++struct flow_offload_hw { ++ struct list_head list; ++ enum flow_offload_type type; ++ struct flow_offload *flow; ++ struct nf_conn *ct; ++ possible_net_t flow_hw_net; ++}; ++ ++static int do_flow_offload_hw(struct net *net, struct flow_offload *flow, ++ int type) ++{ ++ struct net_device *indev; ++ int ret, ifindex; ++ ++ ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx; ++ indev = dev_get_by_index(net, ifindex); ++ if (WARN_ON(!indev)) ++ return 0; ++ ++ mutex_lock(&nf_flow_offload_hw_mutex); ++ ret = indev->netdev_ops->ndo_flow_offload(type, flow); ++ mutex_unlock(&nf_flow_offload_hw_mutex); ++ ++ dev_put(indev); ++ ++ return ret; ++} ++ ++static void flow_offload_hw_work_add(struct flow_offload_hw *offload) ++{ ++ struct net *net; ++ int ret; ++ ++ if (nf_ct_is_dying(offload->ct)) ++ return; ++ ++ net = read_pnet(&offload->flow_hw_net); ++ ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD); ++ if (ret >= 0) ++ offload->flow->flags |= FLOW_OFFLOAD_HW; ++} ++ ++static void flow_offload_hw_work_del(struct flow_offload_hw *offload) ++{ ++ struct net *net = read_pnet(&offload->flow_hw_net); ++ ++ do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL); ++} ++ ++static void flow_offload_hw_work(struct work_struct *work) ++{ ++ struct flow_offload_hw *offload, *next; ++ LIST_HEAD(hw_offload_pending); ++ ++ spin_lock_bh(&flow_offload_hw_pending_list_lock); ++ list_replace_init(&flow_offload_hw_pending_list, &hw_offload_pending); ++ spin_unlock_bh(&flow_offload_hw_pending_list_lock); ++ ++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { ++ switch (offload->type) { ++ case FLOW_OFFLOAD_ADD: ++ flow_offload_hw_work_add(offload); ++ break; ++ case FLOW_OFFLOAD_DEL: ++ flow_offload_hw_work_del(offload); ++ break; ++ } ++ if (offload->ct) ++ nf_conntrack_put(&offload->ct->ct_general); ++ list_del(&offload->list); ++ kfree(offload); ++ } ++} ++ ++static void flow_offload_queue_work(struct flow_offload_hw *offload) ++{ ++ spin_lock_bh(&flow_offload_hw_pending_list_lock); ++ list_add_tail(&offload->list, &flow_offload_hw_pending_list); ++ spin_unlock_bh(&flow_offload_hw_pending_list_lock); ++ ++ schedule_work(&nf_flow_offload_hw_work); ++} ++ ++static void flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct) ++{ ++ struct flow_offload_hw *offload; ++ ++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ if (!offload) ++ return; ++ ++ nf_conntrack_get(&ct->ct_general); ++ offload->type = FLOW_OFFLOAD_ADD; ++ offload->ct = ct; ++ offload->flow = flow; ++ write_pnet(&offload->flow_hw_net, net); ++ ++ flow_offload_queue_work(offload); ++} ++ ++static void flow_offload_hw_del(struct net *net, struct flow_offload *flow) ++{ ++ struct flow_offload_hw *offload; ++ ++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ if (!offload) ++ return; ++ ++ offload->type = FLOW_OFFLOAD_DEL; ++ offload->ct = NULL; ++ offload->flow = flow; ++ write_pnet(&offload->flow_hw_net, net); ++ ++ flow_offload_queue_work(offload); ++} ++ ++static const struct nf_flow_table_hw flow_offload_hw = { ++ .add = flow_offload_hw_add, ++ .del = flow_offload_hw_del, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init nf_flow_table_hw_module_init(void) ++{ ++ INIT_WORK(&nf_flow_offload_hw_work, flow_offload_hw_work); ++ nf_flow_table_hw_register(&flow_offload_hw); ++ ++ return 0; ++} ++ ++static void __exit nf_flow_table_hw_module_exit(void) ++{ ++ struct flow_offload_hw *offload, *next; ++ LIST_HEAD(hw_offload_pending); ++ ++ nf_flow_table_hw_unregister(&flow_offload_hw); ++ cancel_work_sync(&nf_flow_offload_hw_work); ++ ++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { ++ if (offload->ct) ++ nf_conntrack_put(&offload->ct->ct_general); ++ list_del(&offload->list); ++ kfree(offload); ++ } ++} ++ ++module_init(nf_flow_table_hw_module_init); ++module_exit(nf_flow_table_hw_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso "); ++MODULE_ALIAS("nf-flow-table-hw"); +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4866,6 +4866,14 @@ static int nf_tables_flowtable_parse_hoo + if (err < 0) + goto err1; + ++ for (i = 0; i < n; i++) { ++ if (flowtable->data.flags & NF_FLOWTABLE_F_HW && ++ !dev_array[i]->netdev_ops->ndo_flow_offload) { ++ err = -EOPNOTSUPP; ++ goto err1; ++ } ++ } ++ + ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL); + if (!ops) { + err = -ENOMEM; +@@ -4996,10 +5004,19 @@ static int nf_tables_newflowtable(struct + } + + flowtable->data.type = type; ++ write_pnet(&flowtable->data.ft_net, net); ++ + err = type->init(&flowtable->data); + if (err < 0) + goto err3; + ++ if (nla[NFTA_FLOWTABLE_FLAGS]) { ++ flowtable->data.flags = ++ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); ++ if (flowtable->data.flags & ~NF_FLOWTABLE_F_HW) ++ goto err4; ++ } ++ + err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], + flowtable); + if (err < 0) +@@ -5097,7 +5114,8 @@ static int nf_tables_fill_flowtable_info + nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || + nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || + nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), +- NFTA_FLOWTABLE_PAD)) ++ NFTA_FLOWTABLE_PAD) || ++ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags))) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -121,6 +121,9 @@ static void nft_flow_offload_eval(const + if (ret < 0) + goto err_flow_add; + ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) ++ nf_flow_offload_hw_add(nft_net(pkt), flow, ct); ++ + return; + + err_flow_add: diff --git a/target/linux/generic/pending-4.14/641-netfilter-nf_flow_table-support-hw-offload-through-v.patch b/target/linux/generic/pending-4.14/641-netfilter-nf_flow_table-support-hw-offload-through-v.patch new file mode 100644 index 0000000000..58359731fd --- /dev/null +++ b/target/linux/generic/pending-4.14/641-netfilter-nf_flow_table-support-hw-offload-through-v.patch @@ -0,0 +1,302 @@ +From: Felix Fietkau +Date: Thu, 15 Mar 2018 20:46:31 +0100 +Subject: [PATCH] netfilter: nf_flow_table: support hw offload through + virtual interfaces + +There are hardware offload devices that support offloading VLANs and +PPPoE devices. Additionally, it is useful to be able to offload packets +routed through bridge interfaces as well. +Add support for finding the path to the offload device through these +virtual interfaces, while collecting useful parameters for the offload +device, like VLAN ID/protocol, PPPoE session and Ethernet MAC address. + +Signed-off-by: Felix Fietkau +--- + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -827,6 +827,7 @@ struct xfrmdev_ops { + #endif + + struct flow_offload; ++struct flow_offload_hw_path; + + enum flow_offload_type { + FLOW_OFFLOAD_ADD = 0, +@@ -1064,8 +1065,15 @@ enum flow_offload_type { + * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags); + * ++ * int (*ndo_flow_offload_check)(struct flow_offload_hw_path *path); ++ * For virtual devices like bridges, vlan, and pppoe, fill in the ++ * underlying network device that can be used for offloading connections. ++ * Return an error if offloading is not supported. ++ * + * int (*ndo_flow_offload)(enum flow_offload_type type, +- * struct flow_offload *flow); ++ * struct flow_offload *flow, ++ * struct flow_offload_hw_path *src, ++ * struct flow_offload_hw_path *dest); + * Adds/deletes flow entry to/from net device flowtable. + * + * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); +@@ -1292,8 +1300,11 @@ struct net_device_ops { + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags); ++ int (*ndo_flow_offload_check)(struct flow_offload_hw_path *path); + int (*ndo_flow_offload)(enum flow_offload_type type, +- struct flow_offload *flow); ++ struct flow_offload *flow, ++ struct flow_offload_hw_path *src, ++ struct flow_offload_hw_path *dest); + int (*ndo_change_carrier)(struct net_device *dev, + bool new_carrier); + int (*ndo_get_phys_port_id)(struct net_device *dev, +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -86,6 +86,21 @@ struct flow_offload { + }; + }; + ++#define FLOW_OFFLOAD_PATH_ETHERNET BIT(0) ++#define FLOW_OFFLOAD_PATH_VLAN BIT(1) ++#define FLOW_OFFLOAD_PATH_PPPOE BIT(2) ++ ++struct flow_offload_hw_path { ++ struct net_device *dev; ++ u32 flags; ++ ++ u8 eth_src[ETH_ALEN]; ++ u8 eth_dest[ETH_ALEN]; ++ u16 vlan_proto; ++ u16 vlan_id; ++ u16 pppoe_sid; ++}; ++ + #define NF_FLOW_TIMEOUT (30 * HZ) + + struct nf_flow_route { +--- a/net/netfilter/nf_flow_table_hw.c ++++ b/net/netfilter/nf_flow_table_hw.c +@@ -19,48 +19,75 @@ struct flow_offload_hw { + enum flow_offload_type type; + struct flow_offload *flow; + struct nf_conn *ct; +- possible_net_t flow_hw_net; ++ ++ struct flow_offload_hw_path src; ++ struct flow_offload_hw_path dest; + }; + +-static int do_flow_offload_hw(struct net *net, struct flow_offload *flow, +- int type) ++static void flow_offload_check_ethernet(struct flow_offload_tuple *tuple, ++ struct flow_offload_hw_path *path) + { +- struct net_device *indev; +- int ret, ifindex; ++ struct net_device *dev = path->dev; ++ struct neighbour *n; + +- ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx; +- indev = dev_get_by_index(net, ifindex); +- if (WARN_ON(!indev)) +- return 0; ++ if (dev->type != ARPHRD_ETHER) ++ return; + +- mutex_lock(&nf_flow_offload_hw_mutex); +- ret = indev->netdev_ops->ndo_flow_offload(type, flow); +- mutex_unlock(&nf_flow_offload_hw_mutex); ++ memcpy(path->eth_src, path->dev->dev_addr, ETH_ALEN); ++ n = dst_neigh_lookup(tuple->dst_cache, &tuple->src_v4); ++ if (!n) ++ return; + +- dev_put(indev); ++ memcpy(path->eth_dest, n->ha, ETH_ALEN); ++ path->flags |= FLOW_OFFLOAD_PATH_ETHERNET; ++ neigh_release(n); ++} + +- return ret; ++static int flow_offload_check_path(struct net *net, ++ struct flow_offload_tuple *tuple, ++ struct flow_offload_hw_path *path) ++{ ++ struct net_device *dev; ++ ++ dev = dev_get_by_index_rcu(net, tuple->iifidx); ++ if (!dev) ++ return -ENOENT; ++ ++ path->dev = dev; ++ flow_offload_check_ethernet(tuple, path); ++ ++ if (dev->netdev_ops->ndo_flow_offload_check) ++ return dev->netdev_ops->ndo_flow_offload_check(path); ++ ++ return 0; + } + +-static void flow_offload_hw_work_add(struct flow_offload_hw *offload) ++static int do_flow_offload_hw(struct flow_offload_hw *offload) + { +- struct net *net; ++ struct net_device *src_dev = offload->src.dev; ++ struct net_device *dest_dev = offload->dest.dev; + int ret; + +- if (nf_ct_is_dying(offload->ct)) +- return; ++ ret = src_dev->netdev_ops->ndo_flow_offload(offload->type, ++ offload->flow, ++ &offload->src, ++ &offload->dest); + +- net = read_pnet(&offload->flow_hw_net); +- ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD); +- if (ret >= 0) +- offload->flow->flags |= FLOW_OFFLOAD_HW; ++ /* restore devices in case the driver mangled them */ ++ offload->src.dev = src_dev; ++ offload->dest.dev = dest_dev; ++ ++ return ret; + } + +-static void flow_offload_hw_work_del(struct flow_offload_hw *offload) ++static void flow_offload_hw_free(struct flow_offload_hw *offload) + { +- struct net *net = read_pnet(&offload->flow_hw_net); +- +- do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL); ++ dev_put(offload->src.dev); ++ dev_put(offload->dest.dev); ++ if (offload->ct) ++ nf_conntrack_put(&offload->ct->ct_general); ++ list_del(&offload->list); ++ kfree(offload); + } + + static void flow_offload_hw_work(struct work_struct *work) +@@ -73,18 +100,22 @@ static void flow_offload_hw_work(struct + spin_unlock_bh(&flow_offload_hw_pending_list_lock); + + list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { ++ mutex_lock(&nf_flow_offload_hw_mutex); + switch (offload->type) { + case FLOW_OFFLOAD_ADD: +- flow_offload_hw_work_add(offload); ++ if (nf_ct_is_dying(offload->ct)) ++ break; ++ ++ if (do_flow_offload_hw(offload) >= 0) ++ offload->flow->flags |= FLOW_OFFLOAD_HW; + break; + case FLOW_OFFLOAD_DEL: +- flow_offload_hw_work_del(offload); ++ do_flow_offload_hw(offload); + break; + } +- if (offload->ct) +- nf_conntrack_put(&offload->ct->ct_general); +- list_del(&offload->list); +- kfree(offload); ++ mutex_unlock(&nf_flow_offload_hw_mutex); ++ ++ flow_offload_hw_free(offload); + } + } + +@@ -97,20 +128,55 @@ static void flow_offload_queue_work(stru + schedule_work(&nf_flow_offload_hw_work); + } + ++static struct flow_offload_hw * ++flow_offload_hw_prepare(struct net *net, struct flow_offload *flow) ++{ ++ struct flow_offload_hw_path src = {}; ++ struct flow_offload_hw_path dest = {}; ++ struct flow_offload_tuple *tuple; ++ struct flow_offload_hw *offload = NULL; ++ ++ rcu_read_lock_bh(); ++ ++ tuple = &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple; ++ if (flow_offload_check_path(net, tuple, &src)) ++ goto out; ++ ++ tuple = &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple; ++ if (flow_offload_check_path(net, tuple, &dest)) ++ goto out; ++ ++ if (!src.dev->netdev_ops->ndo_flow_offload) ++ goto out; ++ ++ offload = kzalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ if (!offload) ++ goto out; ++ ++ dev_hold(src.dev); ++ dev_hold(dest.dev); ++ offload->src = src; ++ offload->dest = dest; ++ offload->flow = flow; ++ ++out: ++ rcu_read_unlock_bh(); ++ ++ return offload; ++} ++ + static void flow_offload_hw_add(struct net *net, struct flow_offload *flow, + struct nf_conn *ct) + { + struct flow_offload_hw *offload; + +- offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ offload = flow_offload_hw_prepare(net, flow); + if (!offload) + return; + + nf_conntrack_get(&ct->ct_general); + offload->type = FLOW_OFFLOAD_ADD; + offload->ct = ct; +- offload->flow = flow; +- write_pnet(&offload->flow_hw_net, net); + + flow_offload_queue_work(offload); + } +@@ -119,14 +185,11 @@ static void flow_offload_hw_del(struct n + { + struct flow_offload_hw *offload; + +- offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ offload = flow_offload_hw_prepare(net, flow); + if (!offload) + return; + + offload->type = FLOW_OFFLOAD_DEL; +- offload->ct = NULL; +- offload->flow = flow; +- write_pnet(&offload->flow_hw_net, net); + + flow_offload_queue_work(offload); + } +@@ -153,12 +216,8 @@ static void __exit nf_flow_table_hw_modu + nf_flow_table_hw_unregister(&flow_offload_hw); + cancel_work_sync(&nf_flow_offload_hw_work); + +- list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { +- if (offload->ct) +- nf_conntrack_put(&offload->ct->ct_general); +- list_del(&offload->list); +- kfree(offload); +- } ++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) ++ flow_offload_hw_free(offload); + } + + module_init(nf_flow_table_hw_module_init); diff --git a/target/linux/generic/pending-4.14/642-net-8021q-support-hardware-flow-table-offload.patch b/target/linux/generic/pending-4.14/642-net-8021q-support-hardware-flow-table-offload.patch new file mode 100644 index 0000000000..cda2e0ffd4 --- /dev/null +++ b/target/linux/generic/pending-4.14/642-net-8021q-support-hardware-flow-table-offload.patch @@ -0,0 +1,56 @@ +From: Felix Fietkau +Date: Thu, 15 Mar 2018 20:49:58 +0100 +Subject: [PATCH] net: 8021q: support hardware flow table offload + +Add the VLAN ID and protocol information + +Signed-off-by: Felix Fietkau +--- + +--- a/net/8021q/vlan_dev.c ++++ b/net/8021q/vlan_dev.c +@@ -29,8 +29,10 @@ + #include + #include + #include ++#include + #include + #include ++#include + + #include "vlan.h" + #include "vlanproc.h" +@@ -762,6 +764,25 @@ static int vlan_dev_get_iflink(const str + return real_dev->ifindex; + } + ++static int vlan_dev_flow_offload_check(struct flow_offload_hw_path *path) ++{ ++ struct net_device *dev = path->dev; ++ struct vlan_dev_priv *vlan = vlan_dev_priv(dev); ++ ++ if (path->flags & FLOW_OFFLOAD_PATH_VLAN) ++ return -EEXIST; ++ ++ path->flags |= FLOW_OFFLOAD_PATH_VLAN; ++ path->vlan_proto = vlan->vlan_proto; ++ path->vlan_id = vlan->vlan_id; ++ path->dev = vlan->real_dev; ++ ++ if (vlan->real_dev->netdev_ops->ndo_flow_offload_check) ++ return vlan->real_dev->netdev_ops->ndo_flow_offload_check(path); ++ ++ return 0; ++} ++ + static const struct ethtool_ops vlan_ethtool_ops = { + .get_link_ksettings = vlan_ethtool_get_link_ksettings, + .get_drvinfo = vlan_ethtool_get_drvinfo, +@@ -799,6 +820,7 @@ static const struct net_device_ops vlan_ + .ndo_fix_features = vlan_dev_fix_features, + .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, + .ndo_get_iflink = vlan_dev_get_iflink, ++ .ndo_flow_offload_check = vlan_dev_flow_offload_check, + }; + + static void vlan_dev_free(struct net_device *dev) diff --git a/target/linux/generic/pending-4.14/643-net-bridge-support-hardware-flow-table-offload.patch b/target/linux/generic/pending-4.14/643-net-bridge-support-hardware-flow-table-offload.patch new file mode 100644 index 0000000000..3158714f54 --- /dev/null +++ b/target/linux/generic/pending-4.14/643-net-bridge-support-hardware-flow-table-offload.patch @@ -0,0 +1,55 @@ +From: Felix Fietkau +Date: Thu, 15 Mar 2018 20:50:37 +0100 +Subject: [PATCH] net: bridge: support hardware flow table offload + +Look up the real device and pass it on + +Signed-off-by: Felix Fietkau +--- + +--- a/net/bridge/br_device.c ++++ b/net/bridge/br_device.c +@@ -18,6 +18,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include "br_private.h" +@@ -340,6 +342,26 @@ static const struct ethtool_ops br_ethto + .get_link = ethtool_op_get_link, + }; + ++static int br_flow_offload_check(struct flow_offload_hw_path *path) ++{ ++ struct net_device *dev = path->dev; ++ struct net_bridge *br = netdev_priv(dev); ++ struct net_bridge_fdb_entry *dst; ++ ++ if (!(path->flags & FLOW_OFFLOAD_PATH_ETHERNET)) ++ return -EINVAL; ++ ++ dst = br_fdb_find_rcu(br, path->eth_dest, path->vlan_id); ++ if (!dst || !dst->dst) ++ return -ENOENT; ++ ++ path->dev = dst->dst->dev; ++ if (path->dev->netdev_ops->ndo_flow_offload_check) ++ return path->dev->netdev_ops->ndo_flow_offload_check(path); ++ ++ return 0; ++} ++ + static const struct net_device_ops br_netdev_ops = { + .ndo_open = br_dev_open, + .ndo_stop = br_dev_stop, +@@ -367,6 +389,7 @@ static const struct net_device_ops br_ne + .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, + .ndo_features_check = passthru_features_check, ++ .ndo_flow_offload_check = br_flow_offload_check, + }; + + static struct device_type br_type = { diff --git a/target/linux/generic/pending-4.14/644-net-pppoe-support-hardware-flow-table-offload.patch b/target/linux/generic/pending-4.14/644-net-pppoe-support-hardware-flow-table-offload.patch new file mode 100644 index 0000000000..9259fe8df7 --- /dev/null +++ b/target/linux/generic/pending-4.14/644-net-pppoe-support-hardware-flow-table-offload.patch @@ -0,0 +1,110 @@ +From: Felix Fietkau +Date: Thu, 15 Mar 2018 21:15:00 +0100 +Subject: [PATCH] net: pppoe: support hardware flow table offload + +Pass on the PPPoE session ID and the remote MAC address + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -56,6 +56,9 @@ + #include + #include + ++#include ++#include ++ + #define PPP_VERSION "2.4.2" + + /* +@@ -1383,12 +1386,33 @@ static void ppp_dev_priv_destructor(stru + ppp_destroy_interface(ppp); + } + ++static int ppp_flow_offload_check(struct flow_offload_hw_path *path) ++{ ++ struct ppp *ppp = netdev_priv(path->dev); ++ struct ppp_channel *chan; ++ struct channel *pch; ++ ++ if (ppp->flags & SC_MULTILINK) ++ return -EOPNOTSUPP; ++ ++ if (list_empty(&ppp->channels)) ++ return -ENODEV; ++ ++ pch = list_first_entry(&ppp->channels, struct channel, clist); ++ chan = pch->chan; ++ if (!chan->ops->flow_offload_check) ++ return -EOPNOTSUPP; ++ ++ return chan->ops->flow_offload_check(chan, path); ++} ++ + static const struct net_device_ops ppp_netdev_ops = { + .ndo_init = ppp_dev_init, + .ndo_uninit = ppp_dev_uninit, + .ndo_start_xmit = ppp_start_xmit, + .ndo_do_ioctl = ppp_net_ioctl, + .ndo_get_stats64 = ppp_get_stats64, ++ .ndo_flow_offload_check = ppp_flow_offload_check, + }; + + static struct device_type ppp_type = { +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -77,6 +77,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include +@@ -970,8 +972,32 @@ static int pppoe_xmit(struct ppp_channel + return __pppoe_xmit(sk, skb); + } + ++static int pppoe_flow_offload_check(struct ppp_channel *chan, ++ struct flow_offload_hw_path *path) ++{ ++ struct sock *sk = (struct sock *)chan->private; ++ struct pppox_sock *po = pppox_sk(sk); ++ struct net_device *dev = po->pppoe_dev; ++ ++ if (sock_flag(sk, SOCK_DEAD) || ++ !(sk->sk_state & PPPOX_CONNECTED) || !dev) ++ return -ENODEV; ++ ++ path->dev = po->pppoe_dev; ++ path->flags |= FLOW_OFFLOAD_PATH_PPPOE; ++ memcpy(path->eth_src, po->pppoe_dev->dev_addr, ETH_ALEN); ++ memcpy(path->eth_dest, po->pppoe_pa.remote, ETH_ALEN); ++ path->pppoe_sid = be16_to_cpu(po->num); ++ ++ if (path->dev->netdev_ops->ndo_flow_offload_check) ++ return path->dev->netdev_ops->ndo_flow_offload_check(path); ++ ++ return 0; ++} ++ + static const struct ppp_channel_ops pppoe_chan_ops = { + .start_xmit = pppoe_xmit, ++ .flow_offload_check = pppoe_flow_offload_check, + }; + + static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, +--- a/include/linux/ppp_channel.h ++++ b/include/linux/ppp_channel.h +@@ -32,6 +32,8 @@ struct ppp_channel_ops { + int (*start_xmit)(struct ppp_channel *, struct sk_buff *); + /* Handle an ioctl call that has come in via /dev/ppp. */ + int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long); ++ ++ int (*flow_offload_check)(struct ppp_channel *, struct flow_offload_hw_path *); + }; + + struct ppp_channel { diff --git a/target/linux/generic/pending-4.14/645-netfilter-nf_flow_table-rework-hardware-offload-time.patch b/target/linux/generic/pending-4.14/645-netfilter-nf_flow_table-rework-hardware-offload-time.patch new file mode 100644 index 0000000000..8da15bc336 --- /dev/null +++ b/target/linux/generic/pending-4.14/645-netfilter-nf_flow_table-rework-hardware-offload-time.patch @@ -0,0 +1,37 @@ +From: Felix Fietkau +Date: Sun, 25 Mar 2018 21:10:55 +0200 +Subject: [PATCH] netfilter: nf_flow_table: rework hardware offload timeout + handling + +Some offload implementations send keepalive packets + explicit +notifications of TCP FIN/RST packets. In this case it is more convenient +to simply let the driver update flow->timeout handling and use the +regular flow offload gc step. + +For drivers that manage their own lifetime, a separate flag can be set +to avoid gc timeouts. + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -76,6 +76,7 @@ struct flow_offload_tuple_rhash { + #define FLOW_OFFLOAD_DYING 0x4 + #define FLOW_OFFLOAD_TEARDOWN 0x8 + #define FLOW_OFFLOAD_HW 0x10 ++#define FLOW_OFFLOAD_KEEP 0x20 + + struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -332,7 +332,7 @@ static int nf_flow_offload_gc_step(struc + teardown = flow->flags & (FLOW_OFFLOAD_DYING | + FLOW_OFFLOAD_TEARDOWN); + +- if (nf_flow_in_hw(flow) && !teardown) ++ if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown) + continue; + + if (nf_flow_has_expired(flow) || teardown)