From: Felix Fietkau Date: Tue, 9 Dec 2014 11:01:49 +0000 (+0000) Subject: kernel: add a patch to make netfilter conntrack cache routing information X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=27f36718d31026fadbf2b627eed2d261e98493f6;p=openwrt%2Fstaging%2Fnbd.git kernel: add a patch to make netfilter conntrack cache routing information Significantly improves routing / NAT performance Signed-off-by: Felix Fietkau SVN-Revision: 43587 --- diff --git a/include/netfilter.mk b/include/netfilter.mk index ead32cb183..660bcc3d5d 100644 --- a/include/netfilter.mk +++ b/include/netfilter.mk @@ -66,6 +66,7 @@ $(eval $(if $(NF_KMOD),,$(call nf_add,IPT_CORE,CONFIG_NETFILTER_XT_MARK, $(P_XT) # kernel only $(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK, $(P_XT)nf_conntrack),)) +$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK_RTCACHE, $(P_XT)nf_conntrack_rtcache),)) $(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_DEFRAG_IPV4, $(P_V4)nf_defrag_ipv4),)) $(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK_IPV4, $(P_V4)nf_conntrack_ipv4),)) diff --git a/target/linux/generic/patches-3.14/090-backport_netfilter_rtcache.patch b/target/linux/generic/patches-3.14/090-backport_netfilter_rtcache.patch new file mode 100644 index 0000000000..ebe573f576 --- /dev/null +++ b/target/linux/generic/patches-3.14/090-backport_netfilter_rtcache.patch @@ -0,0 +1,504 @@ +Subject: netfilter: conntrack: cache route for forwarded connections + +... to avoid per-packet FIB lookup if possible. + +The cached dst is re-used provided the input interface +is the same as that of the previous packet in the same direction. + +If not, the cached dst is invalidated. + +For ipv6 we also need to store sernum, else dst_check doesn't work, +pointed out by Eric Dumazet. + +This should speed up forwarding when conntrack is already in use +anyway, especially when using reverse path filtering -- active RPF +enforces two FIB lookups for each packet. + +Before the routing cache removal this didn't matter since RPF was performed +only when route cache didn't yield a result; but without route cache it +comes at higher price. + +Julian Anastasov suggested to add NETDEV_UNREGISTER handler to +avoid holding on to dsts of 'frozen' conntracks. + +Signed-off-by: Florian Westphal + +--- a/include/net/netfilter/nf_conntrack_extend.h ++++ b/include/net/netfilter/nf_conntrack_extend.h +@@ -30,6 +30,9 @@ enum nf_ct_ext_id { + #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + NF_CT_EXT_SYNPROXY, + #endif ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE) ++ NF_CT_EXT_RTCACHE, ++#endif + NF_CT_EXT_NUM, + }; + +@@ -43,6 +46,7 @@ enum nf_ct_ext_id { + #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout + #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels + #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy ++#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache + + /* Extensions: optional stuff which isn't permanently in struct. */ + struct nf_ct_ext { +--- /dev/null ++++ b/include/net/netfilter/nf_conntrack_rtcache.h +@@ -0,0 +1,34 @@ ++#include ++#include ++#include ++ ++struct dst_entry; ++ ++struct nf_conn_dst_cache { ++ struct dst_entry *dst; ++ int iif; ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ u32 cookie; ++#endif ++ ++}; ++ ++struct nf_conn_rtcache { ++ struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX]; ++}; ++ ++static inline ++struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct) ++{ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE) ++ return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE); ++#else ++ return NULL; ++#endif ++} ++ ++static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ return rtc->cached_dst[dir].iif; ++} +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -103,6 +103,18 @@ config NF_CONNTRACK_EVENTS + + If unsure, say `N'. + ++config NF_CONNTRACK_RTCACHE ++ tristate "Cache route entries in conntrack objects" ++ depends on NETFILTER_ADVANCED ++ depends on NF_CONNTRACK ++ help ++ If this option is enabled, the connection tracking code will ++ cache routing information for each connection that is being ++ forwarded, at a cost of 32 bytes per conntrack object. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ The module will be called nf_conntrack_rtcache. ++ + config NF_CONNTRACK_TIMEOUT + bool 'Connection tracking timeout' + depends on NETFILTER_ADVANCED +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -18,6 +18,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n + # connection tracking + obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o + ++# optional conntrack route cache extension ++obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o ++ + # SCTP protocol connection tracking + obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o + obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o +--- /dev/null ++++ b/net/netfilter/nf_conntrack_rtcache.c +@@ -0,0 +1,386 @@ ++/* route cache for netfilter. ++ * ++ * (C) 2014 Red Hat GmbH ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++#include ++#endif ++ ++static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ struct dst_entry *dst = rtc->cached_dst[dir].dst; ++ ++ dst_release(dst); ++} ++ ++static void nf_conn_rtcache_destroy(struct nf_conn *ct) ++{ ++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); ++ ++ if (!rtc) ++ return; ++ ++ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL); ++ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY); ++} ++ ++static void nf_ct_rtcache_ext_add(struct nf_conn *ct) ++{ ++ struct nf_conn_rtcache *rtc; ++ ++ rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC); ++ if (rtc) { ++ rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1; ++ rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL; ++ rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1; ++ rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL; ++ } ++} ++ ++static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct) ++{ ++ if (nf_ct_is_untracked(ct)) ++ return NULL; ++ return nf_ct_rtcache_find(ct); ++} ++ ++static struct dst_entry * ++nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ return rtc->cached_dst[dir].dst; ++} ++ ++static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst) ++{ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ if (pf == NFPROTO_IPV6) { ++ const struct rt6_info *rt = (const struct rt6_info *)dst; ++ ++ if (rt->rt6i_node) ++ return (u32)rt->rt6i_node->fn_sernum; ++ } ++#endif ++ return 0; ++} ++ ++static void nf_conn_rtcache_dst_set(int pf, ++ struct nf_conn_rtcache *rtc, ++ struct dst_entry *dst, ++ enum ip_conntrack_dir dir, int iif) ++{ ++ if (rtc->cached_dst[dir].iif != iif) ++ rtc->cached_dst[dir].iif = iif; ++ ++ if (rtc->cached_dst[dir].dst != dst) { ++ struct dst_entry *old; ++ ++ dst_hold(dst); ++ ++ old = xchg(&rtc->cached_dst[dir].dst, dst); ++ dst_release(old); ++ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ if (pf == NFPROTO_IPV6) ++ rtc->cached_dst[dir].cookie = ++ nf_rtcache_get_cookie(pf, dst); ++#endif ++ } ++} ++ ++static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ struct dst_entry *old; ++ ++ pr_debug("Invalidate iif %d for dir %d on cache %p\n", ++ rtc->cached_dst[dir].iif, dir, rtc); ++ ++ old = xchg(&rtc->cached_dst[dir].dst, NULL); ++ dst_release(old); ++ rtc->cached_dst[dir].iif = -1; ++} ++ ++static unsigned int nf_rtcache_in(const struct nf_hook_ops *ops, ++ struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ int (*okfn)(struct sk_buff *)) ++{ ++ struct nf_conn_rtcache *rtc; ++ enum ip_conntrack_info ctinfo; ++ enum ip_conntrack_dir dir; ++ struct dst_entry *dst; ++ struct nf_conn *ct; ++ int iif; ++ u32 cookie; ++ ++ if (skb_dst(skb) || skb->sk) ++ return NF_ACCEPT; ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (!ct) ++ return NF_ACCEPT; ++ ++ rtc = nf_ct_rtcache_find_usable(ct); ++ if (!rtc) ++ return NF_ACCEPT; ++ ++ /* if iif changes, don't use cache and let ip stack ++ * do route lookup. ++ * ++ * If rp_filter is enabled it might toss skb, so ++ * we don't want to avoid these checks. ++ */ ++ dir = CTINFO2DIR(ctinfo); ++ iif = nf_conn_rtcache_iif_get(rtc, dir); ++ if (in->ifindex != iif) { ++ pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n", ++ ct, iif, in->ifindex); ++ return NF_ACCEPT; ++ } ++ dst = nf_conn_rtcache_dst_get(rtc, dir); ++ if (dst == NULL) ++ return NF_ACCEPT; ++ ++ cookie = nf_rtcache_get_cookie(ops->pf, dst); ++ ++ dst = dst_check(dst, cookie); ++ pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie); ++ if (likely(dst)) ++ skb_dst_set_noref_force(skb, dst); ++ else ++ nf_conn_rtcache_dst_obsolete(rtc, dir); ++ ++ return NF_ACCEPT; ++} ++ ++static unsigned int nf_rtcache_forward(const struct nf_hook_ops *ops, ++ struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ int (*okfn)(struct sk_buff *)) ++{ ++ struct nf_conn_rtcache *rtc; ++ enum ip_conntrack_info ctinfo; ++ enum ip_conntrack_dir dir; ++ struct nf_conn *ct; ++ int iif; ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (!ct) ++ return NF_ACCEPT; ++ ++ if (!nf_ct_is_confirmed(ct)) { ++ if (WARN_ON(nf_ct_rtcache_find(ct))) ++ return NF_ACCEPT; ++ nf_ct_rtcache_ext_add(ct); ++ return NF_ACCEPT; ++ } ++ ++ rtc = nf_ct_rtcache_find_usable(ct); ++ if (!rtc) ++ return NF_ACCEPT; ++ ++ dir = CTINFO2DIR(ctinfo); ++ iif = nf_conn_rtcache_iif_get(rtc, dir); ++ pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n", ++ ct, skb, dir, iif, in->ifindex); ++ if (likely(in->ifindex == iif)) ++ return NF_ACCEPT; ++ ++ nf_conn_rtcache_dst_set(ops->pf, rtc, skb_dst(skb), dir, in->ifindex); ++ return NF_ACCEPT; ++} ++ ++static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data) ++{ ++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); ++ struct net_device *dev = data; ++ ++ if (!rtc) ++ return 0; ++ ++ if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif || ++ dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) { ++ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL); ++ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY); ++ } ++ ++ return 0; ++} ++ ++static int nf_rtcache_netdev_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ struct net *net = dev_net(dev); ++ ++ if (event == NETDEV_DOWN) ++ nf_ct_iterate_cleanup(net, nf_rtcache_dst_remove, dev, 0, 0); ++ ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block nf_rtcache_notifier = { ++ .notifier_call = nf_rtcache_netdev_event, ++}; ++ ++static struct nf_hook_ops rtcache_ops[] = { ++ { ++ .hook = nf_rtcache_in, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV4, ++ .hooknum = NF_INET_PRE_ROUTING, ++ .priority = NF_IP_PRI_LAST, ++ }, ++ { ++ .hook = nf_rtcache_forward, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV4, ++ .hooknum = NF_INET_FORWARD, ++ .priority = NF_IP_PRI_LAST, ++ }, ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ { ++ .hook = nf_rtcache_in, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV6, ++ .hooknum = NF_INET_PRE_ROUTING, ++ .priority = NF_IP_PRI_LAST, ++ }, ++ { ++ .hook = nf_rtcache_forward, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV6, ++ .hooknum = NF_INET_FORWARD, ++ .priority = NF_IP_PRI_LAST, ++ }, ++#endif ++}; ++ ++static struct nf_ct_ext_type rtcache_extend __read_mostly = { ++ .len = sizeof(struct nf_conn_rtcache), ++ .align = __alignof__(struct nf_conn_rtcache), ++ .id = NF_CT_EXT_RTCACHE, ++ .destroy = nf_conn_rtcache_destroy, ++}; ++ ++static int __init nf_conntrack_rtcache_init(void) ++{ ++ int ret = nf_ct_extend_register(&rtcache_extend); ++ ++ if (ret < 0) { ++ pr_err("nf_conntrack_rtcache: Unable to register extension\n"); ++ return ret; ++ } ++ ++ ret = nf_register_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); ++ if (ret < 0) { ++ nf_ct_extend_unregister(&rtcache_extend); ++ return ret; ++ } ++ ++ ret = register_netdevice_notifier(&nf_rtcache_notifier); ++ if (ret) { ++ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); ++ nf_ct_extend_unregister(&rtcache_extend); ++ } ++ ++ return ret; ++} ++ ++static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data) ++{ ++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); ++ ++ return rtc != NULL; ++} ++ ++static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net) ++{ ++ bool wait = false; ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ struct nf_conntrack_tuple_hash *h; ++ struct hlist_nulls_node *n; ++ struct nf_conn *ct; ++ ++ rcu_read_lock(); ++ spin_lock_bh(&nf_conntrack_lock); ++ ++ hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) { ++ ct = nf_ct_tuplehash_to_ctrack(h); ++ if (nf_ct_rtcache_find(ct) != NULL) { ++ wait = true; ++ break; ++ } ++ } ++ spin_unlock_bh(&nf_conntrack_lock); ++ rcu_read_unlock(); ++ } ++ ++ return wait; ++} ++ ++static void __exit nf_conntrack_rtcache_fini(void) ++{ ++ struct net *net; ++ int count = 0; ++ ++ /* remove hooks so no new connections get rtcache extension */ ++ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); ++ ++ synchronize_net(); ++ ++ unregister_netdevice_notifier(&nf_rtcache_notifier); ++ ++ rtnl_lock(); ++ ++ /* zap all conntracks with rtcache extension */ ++ for_each_net(net) ++ nf_ct_iterate_cleanup(net, nf_rtcache_ext_remove, NULL, 0, 0); ++ ++ for_each_net(net) { ++ /* .. and make sure they're gone from dying list, too */ ++ while (nf_conntrack_rtcache_wait_for_dying(net)) { ++ msleep(200); ++ WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n"); ++ } ++ } ++ ++ rtnl_unlock(); ++ synchronize_net(); ++ nf_ct_extend_unregister(&rtcache_extend); ++} ++module_init(nf_conntrack_rtcache_init); ++module_exit(nf_conntrack_rtcache_fini); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Florian Westphal "); ++MODULE_DESCRIPTION("Conntrack route cache extension"); diff --git a/target/linux/generic/patches-3.14/250-netfilter_depends.patch b/target/linux/generic/patches-3.14/250-netfilter_depends.patch index 6edd9c6b02..e9239a7e1b 100644 --- a/target/linux/generic/patches-3.14/250-netfilter_depends.patch +++ b/target/linux/generic/patches-3.14/250-netfilter_depends.patch @@ -1,6 +1,6 @@ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -195,7 +195,6 @@ config NF_CONNTRACK_FTP +@@ -207,7 +207,6 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support" @@ -8,7 +8,7 @@ depends on NETFILTER_ADVANCED help H.323 is a VoIP signalling protocol from ITU-T. As one of the most -@@ -871,7 +870,6 @@ config NETFILTER_XT_TARGET_SECMARK +@@ -883,7 +882,6 @@ config NETFILTER_XT_TARGET_SECMARK config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' diff --git a/target/linux/generic/patches-3.14/600-netfilter_layer7_2.22.patch b/target/linux/generic/patches-3.14/600-netfilter_layer7_2.22.patch index d57ee1f085..6d3db38eb4 100644 --- a/target/linux/generic/patches-3.14/600-netfilter_layer7_2.22.patch +++ b/target/linux/generic/patches-3.14/600-netfilter_layer7_2.22.patch @@ -1,6 +1,6 @@ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -1345,6 +1345,27 @@ config NETFILTER_XT_MATCH_STATE +@@ -1357,6 +1357,27 @@ config NETFILTER_XT_MATCH_STATE To compile it as a module, choose M here. If unsure, say N. @@ -30,7 +30,7 @@ depends on NETFILTER_ADVANCED --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile -@@ -158,6 +158,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) +@@ -161,6 +161,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o diff --git a/target/linux/generic/patches-3.14/603-netfilter_layer7_2.6.36_fix.patch b/target/linux/generic/patches-3.14/603-netfilter_layer7_2.6.36_fix.patch index d7e2ac22b4..85c6f0de40 100644 --- a/target/linux/generic/patches-3.14/603-netfilter_layer7_2.6.36_fix.patch +++ b/target/linux/generic/patches-3.14/603-netfilter_layer7_2.6.36_fix.patch @@ -1,6 +1,6 @@ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -1151,6 +1151,27 @@ config NETFILTER_XT_MATCH_L2TP +@@ -1163,6 +1163,27 @@ config NETFILTER_XT_MATCH_L2TP To compile it as a module, choose M here. If unsure, say N. @@ -28,7 +28,7 @@ config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED -@@ -1345,26 +1366,11 @@ config NETFILTER_XT_MATCH_STATE +@@ -1357,26 +1378,11 @@ config NETFILTER_XT_MATCH_STATE To compile it as a module, choose M here. If unsure, say N. diff --git a/target/linux/generic/patches-3.14/615-netfilter_add_xt_id_match.patch b/target/linux/generic/patches-3.14/615-netfilter_add_xt_id_match.patch index c91f035a12..07b4bfae6a 100644 --- a/target/linux/generic/patches-3.14/615-netfilter_add_xt_id_match.patch +++ b/target/linux/generic/patches-3.14/615-netfilter_add_xt_id_match.patch @@ -21,7 +21,7 @@ +#endif /* XT_ID_H */ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -1121,6 +1121,13 @@ config NETFILTER_XT_MATCH_IPCOMP +@@ -1133,6 +1133,13 @@ config NETFILTER_XT_MATCH_IPCOMP To compile it as a module, choose M here. If unsure, say N. @@ -37,7 +37,7 @@ depends on NETFILTER_ADVANCED --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile -@@ -136,6 +136,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += +@@ -139,6 +139,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o diff --git a/target/linux/generic/patches-3.18/050-backport_netfilter_rtcache.patch b/target/linux/generic/patches-3.18/050-backport_netfilter_rtcache.patch new file mode 100644 index 0000000000..61a1411e4e --- /dev/null +++ b/target/linux/generic/patches-3.18/050-backport_netfilter_rtcache.patch @@ -0,0 +1,505 @@ +Subject: netfilter: conntrack: cache route for forwarded connections + +... to avoid per-packet FIB lookup if possible. + +The cached dst is re-used provided the input interface +is the same as that of the previous packet in the same direction. + +If not, the cached dst is invalidated. + +For ipv6 we also need to store sernum, else dst_check doesn't work, +pointed out by Eric Dumazet. + +This should speed up forwarding when conntrack is already in use +anyway, especially when using reverse path filtering -- active RPF +enforces two FIB lookups for each packet. + +Before the routing cache removal this didn't matter since RPF was performed +only when route cache didn't yield a result; but without route cache it +comes at higher price. + +Julian Anastasov suggested to add NETDEV_UNREGISTER handler to +avoid holding on to dsts of 'frozen' conntracks. + +Signed-off-by: Florian Westphal + +--- a/include/net/netfilter/nf_conntrack_extend.h ++++ b/include/net/netfilter/nf_conntrack_extend.h +@@ -30,6 +30,9 @@ enum nf_ct_ext_id { + #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + NF_CT_EXT_SYNPROXY, + #endif ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE) ++ NF_CT_EXT_RTCACHE, ++#endif + NF_CT_EXT_NUM, + }; + +@@ -43,6 +46,7 @@ enum nf_ct_ext_id { + #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout + #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels + #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy ++#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache + + /* Extensions: optional stuff which isn't permanently in struct. */ + struct nf_ct_ext { +--- /dev/null ++++ b/include/net/netfilter/nf_conntrack_rtcache.h +@@ -0,0 +1,34 @@ ++#include ++#include ++#include ++ ++struct dst_entry; ++ ++struct nf_conn_dst_cache { ++ struct dst_entry *dst; ++ int iif; ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ u32 cookie; ++#endif ++ ++}; ++ ++struct nf_conn_rtcache { ++ struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX]; ++}; ++ ++static inline ++struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct) ++{ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE) ++ return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE); ++#else ++ return NULL; ++#endif ++} ++ ++static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ return rtc->cached_dst[dir].iif; ++} +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -106,6 +106,18 @@ config NF_CONNTRACK_EVENTS + + If unsure, say `N'. + ++config NF_CONNTRACK_RTCACHE ++ tristate "Cache route entries in conntrack objects" ++ depends on NETFILTER_ADVANCED ++ depends on NF_CONNTRACK ++ help ++ If this option is enabled, the connection tracking code will ++ cache routing information for each connection that is being ++ forwarded, at a cost of 32 bytes per conntrack object. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ The module will be called nf_conntrack_rtcache. ++ + config NF_CONNTRACK_TIMEOUT + bool 'Connection tracking timeout' + depends on NETFILTER_ADVANCED +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -18,6 +18,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n + # connection tracking + obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o + ++# optional conntrack route cache extension ++obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o ++ + # SCTP protocol connection tracking + obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o + obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o +--- /dev/null ++++ b/net/netfilter/nf_conntrack_rtcache.c +@@ -0,0 +1,387 @@ ++/* route cache for netfilter. ++ * ++ * (C) 2014 Red Hat GmbH ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++#include ++#endif ++ ++static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ struct dst_entry *dst = rtc->cached_dst[dir].dst; ++ ++ dst_release(dst); ++} ++ ++static void nf_conn_rtcache_destroy(struct nf_conn *ct) ++{ ++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); ++ ++ if (!rtc) ++ return; ++ ++ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL); ++ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY); ++} ++ ++static void nf_ct_rtcache_ext_add(struct nf_conn *ct) ++{ ++ struct nf_conn_rtcache *rtc; ++ ++ rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC); ++ if (rtc) { ++ rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1; ++ rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL; ++ rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1; ++ rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL; ++ } ++} ++ ++static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct) ++{ ++ if (nf_ct_is_untracked(ct)) ++ return NULL; ++ return nf_ct_rtcache_find(ct); ++} ++ ++static struct dst_entry * ++nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ return rtc->cached_dst[dir].dst; ++} ++ ++static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst) ++{ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ if (pf == NFPROTO_IPV6) { ++ const struct rt6_info *rt = (const struct rt6_info *)dst; ++ ++ if (rt->rt6i_node) ++ return (u32)rt->rt6i_node->fn_sernum; ++ } ++#endif ++ return 0; ++} ++ ++static void nf_conn_rtcache_dst_set(int pf, ++ struct nf_conn_rtcache *rtc, ++ struct dst_entry *dst, ++ enum ip_conntrack_dir dir, int iif) ++{ ++ if (rtc->cached_dst[dir].iif != iif) ++ rtc->cached_dst[dir].iif = iif; ++ ++ if (rtc->cached_dst[dir].dst != dst) { ++ struct dst_entry *old; ++ ++ dst_hold(dst); ++ ++ old = xchg(&rtc->cached_dst[dir].dst, dst); ++ dst_release(old); ++ ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ if (pf == NFPROTO_IPV6) ++ rtc->cached_dst[dir].cookie = ++ nf_rtcache_get_cookie(pf, dst); ++#endif ++ } ++} ++ ++static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc, ++ enum ip_conntrack_dir dir) ++{ ++ struct dst_entry *old; ++ ++ pr_debug("Invalidate iif %d for dir %d on cache %p\n", ++ rtc->cached_dst[dir].iif, dir, rtc); ++ ++ old = xchg(&rtc->cached_dst[dir].dst, NULL); ++ dst_release(old); ++ rtc->cached_dst[dir].iif = -1; ++} ++ ++static unsigned int nf_rtcache_in(const struct nf_hook_ops *ops, ++ struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ int (*okfn)(struct sk_buff *)) ++{ ++ struct nf_conn_rtcache *rtc; ++ enum ip_conntrack_info ctinfo; ++ enum ip_conntrack_dir dir; ++ struct dst_entry *dst; ++ struct nf_conn *ct; ++ int iif; ++ u32 cookie; ++ ++ if (skb_dst(skb) || skb->sk) ++ return NF_ACCEPT; ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (!ct) ++ return NF_ACCEPT; ++ ++ rtc = nf_ct_rtcache_find_usable(ct); ++ if (!rtc) ++ return NF_ACCEPT; ++ ++ /* if iif changes, don't use cache and let ip stack ++ * do route lookup. ++ * ++ * If rp_filter is enabled it might toss skb, so ++ * we don't want to avoid these checks. ++ */ ++ dir = CTINFO2DIR(ctinfo); ++ iif = nf_conn_rtcache_iif_get(rtc, dir); ++ if (in->ifindex != iif) { ++ pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n", ++ ct, iif, in->ifindex); ++ return NF_ACCEPT; ++ } ++ dst = nf_conn_rtcache_dst_get(rtc, dir); ++ if (dst == NULL) ++ return NF_ACCEPT; ++ ++ cookie = nf_rtcache_get_cookie(ops->pf, dst); ++ ++ dst = dst_check(dst, cookie); ++ pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie); ++ if (likely(dst)) ++ skb_dst_set_noref_force(skb, dst); ++ else ++ nf_conn_rtcache_dst_obsolete(rtc, dir); ++ ++ return NF_ACCEPT; ++} ++ ++static unsigned int nf_rtcache_forward(const struct nf_hook_ops *ops, ++ struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ int (*okfn)(struct sk_buff *)) ++{ ++ struct nf_conn_rtcache *rtc; ++ enum ip_conntrack_info ctinfo; ++ enum ip_conntrack_dir dir; ++ struct nf_conn *ct; ++ int iif; ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (!ct) ++ return NF_ACCEPT; ++ ++ if (!nf_ct_is_confirmed(ct)) { ++ if (WARN_ON(nf_ct_rtcache_find(ct))) ++ return NF_ACCEPT; ++ nf_ct_rtcache_ext_add(ct); ++ return NF_ACCEPT; ++ } ++ ++ rtc = nf_ct_rtcache_find_usable(ct); ++ if (!rtc) ++ return NF_ACCEPT; ++ ++ dir = CTINFO2DIR(ctinfo); ++ iif = nf_conn_rtcache_iif_get(rtc, dir); ++ pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n", ++ ct, skb, dir, iif, in->ifindex); ++ if (likely(in->ifindex == iif)) ++ return NF_ACCEPT; ++ ++ nf_conn_rtcache_dst_set(ops->pf, rtc, skb_dst(skb), dir, in->ifindex); ++ return NF_ACCEPT; ++} ++ ++static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data) ++{ ++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); ++ struct net_device *dev = data; ++ ++ if (!rtc) ++ return 0; ++ ++ if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif || ++ dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) { ++ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL); ++ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY); ++ } ++ ++ return 0; ++} ++ ++static int nf_rtcache_netdev_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ struct net *net = dev_net(dev); ++ ++ if (event == NETDEV_DOWN) ++ nf_ct_iterate_cleanup(net, nf_rtcache_dst_remove, dev, 0, 0); ++ ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block nf_rtcache_notifier = { ++ .notifier_call = nf_rtcache_netdev_event, ++}; ++ ++static struct nf_hook_ops rtcache_ops[] = { ++ { ++ .hook = nf_rtcache_in, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV4, ++ .hooknum = NF_INET_PRE_ROUTING, ++ .priority = NF_IP_PRI_LAST, ++ }, ++ { ++ .hook = nf_rtcache_forward, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV4, ++ .hooknum = NF_INET_FORWARD, ++ .priority = NF_IP_PRI_LAST, ++ }, ++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) ++ { ++ .hook = nf_rtcache_in, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV6, ++ .hooknum = NF_INET_PRE_ROUTING, ++ .priority = NF_IP_PRI_LAST, ++ }, ++ { ++ .hook = nf_rtcache_forward, ++ .owner = THIS_MODULE, ++ .pf = NFPROTO_IPV6, ++ .hooknum = NF_INET_FORWARD, ++ .priority = NF_IP_PRI_LAST, ++ }, ++#endif ++}; ++ ++static struct nf_ct_ext_type rtcache_extend __read_mostly = { ++ .len = sizeof(struct nf_conn_rtcache), ++ .align = __alignof__(struct nf_conn_rtcache), ++ .id = NF_CT_EXT_RTCACHE, ++ .destroy = nf_conn_rtcache_destroy, ++}; ++ ++static int __init nf_conntrack_rtcache_init(void) ++{ ++ int ret = nf_ct_extend_register(&rtcache_extend); ++ ++ if (ret < 0) { ++ pr_err("nf_conntrack_rtcache: Unable to register extension\n"); ++ return ret; ++ } ++ ++ ret = nf_register_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); ++ if (ret < 0) { ++ nf_ct_extend_unregister(&rtcache_extend); ++ return ret; ++ } ++ ++ ret = register_netdevice_notifier(&nf_rtcache_notifier); ++ if (ret) { ++ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); ++ nf_ct_extend_unregister(&rtcache_extend); ++ } ++ ++ return ret; ++} ++ ++static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data) ++{ ++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); ++ ++ return rtc != NULL; ++} ++ ++static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net) ++{ ++ bool wait = false; ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ struct nf_conntrack_tuple_hash *h; ++ struct hlist_nulls_node *n; ++ struct nf_conn *ct; ++ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); ++ ++ rcu_read_lock(); ++ spin_lock_bh(&pcpu->lock); ++ ++ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { ++ ct = nf_ct_tuplehash_to_ctrack(h); ++ if (nf_ct_rtcache_find(ct) != NULL) { ++ wait = true; ++ break; ++ } ++ } ++ spin_unlock_bh(&pcpu->lock); ++ rcu_read_unlock(); ++ } ++ ++ return wait; ++} ++ ++static void __exit nf_conntrack_rtcache_fini(void) ++{ ++ struct net *net; ++ int count = 0; ++ ++ /* remove hooks so no new connections get rtcache extension */ ++ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); ++ ++ synchronize_net(); ++ ++ unregister_netdevice_notifier(&nf_rtcache_notifier); ++ ++ rtnl_lock(); ++ ++ /* zap all conntracks with rtcache extension */ ++ for_each_net(net) ++ nf_ct_iterate_cleanup(net, nf_rtcache_ext_remove, NULL, 0, 0); ++ ++ for_each_net(net) { ++ /* .. and make sure they're gone from dying list, too */ ++ while (nf_conntrack_rtcache_wait_for_dying(net)) { ++ msleep(200); ++ WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n"); ++ } ++ } ++ ++ rtnl_unlock(); ++ synchronize_net(); ++ nf_ct_extend_unregister(&rtcache_extend); ++} ++module_init(nf_conntrack_rtcache_init); ++module_exit(nf_conntrack_rtcache_fini); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Florian Westphal "); ++MODULE_DESCRIPTION("Conntrack route cache extension"); diff --git a/target/linux/generic/patches-3.18/250-netfilter_depends.patch b/target/linux/generic/patches-3.18/250-netfilter_depends.patch index af59b021c8..47be4a0197 100644 --- a/target/linux/generic/patches-3.18/250-netfilter_depends.patch +++ b/target/linux/generic/patches-3.18/250-netfilter_depends.patch @@ -1,6 +1,6 @@ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -198,7 +198,6 @@ config NF_CONNTRACK_FTP +@@ -210,7 +210,6 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support" @@ -8,7 +8,7 @@ depends on NETFILTER_ADVANCED help H.323 is a VoIP signalling protocol from ITU-T. As one of the most -@@ -895,7 +894,6 @@ config NETFILTER_XT_TARGET_SECMARK +@@ -907,7 +906,6 @@ config NETFILTER_XT_TARGET_SECMARK config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' diff --git a/target/linux/generic/patches-3.18/600-netfilter_layer7_2.22.patch b/target/linux/generic/patches-3.18/600-netfilter_layer7_2.22.patch index 4b0a364c3f..bdf6c88ef7 100644 --- a/target/linux/generic/patches-3.18/600-netfilter_layer7_2.22.patch +++ b/target/linux/generic/patches-3.18/600-netfilter_layer7_2.22.patch @@ -1,6 +1,6 @@ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -1369,6 +1369,27 @@ config NETFILTER_XT_MATCH_STATE +@@ -1381,6 +1381,27 @@ config NETFILTER_XT_MATCH_STATE To compile it as a module, choose M here. If unsure, say N. @@ -30,7 +30,7 @@ depends on NETFILTER_ADVANCED --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile -@@ -162,6 +162,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) +@@ -165,6 +165,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o diff --git a/target/linux/generic/patches-3.18/603-netfilter_layer7_2.6.36_fix.patch b/target/linux/generic/patches-3.18/603-netfilter_layer7_2.6.36_fix.patch index dcd5b7e859..024e37f9bd 100644 --- a/target/linux/generic/patches-3.18/603-netfilter_layer7_2.6.36_fix.patch +++ b/target/linux/generic/patches-3.18/603-netfilter_layer7_2.6.36_fix.patch @@ -1,6 +1,6 @@ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -1175,6 +1175,27 @@ config NETFILTER_XT_MATCH_L2TP +@@ -1187,6 +1187,27 @@ config NETFILTER_XT_MATCH_L2TP To compile it as a module, choose M here. If unsure, say N. @@ -28,7 +28,7 @@ config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED -@@ -1369,26 +1390,11 @@ config NETFILTER_XT_MATCH_STATE +@@ -1381,26 +1402,11 @@ config NETFILTER_XT_MATCH_STATE To compile it as a module, choose M here. If unsure, say N. diff --git a/target/linux/generic/patches-3.18/615-netfilter_add_xt_id_match.patch b/target/linux/generic/patches-3.18/615-netfilter_add_xt_id_match.patch index 265ff207b6..cbb9b2b8bd 100644 --- a/target/linux/generic/patches-3.18/615-netfilter_add_xt_id_match.patch +++ b/target/linux/generic/patches-3.18/615-netfilter_add_xt_id_match.patch @@ -21,7 +21,7 @@ +#endif /* XT_ID_H */ --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig -@@ -1145,6 +1145,13 @@ config NETFILTER_XT_MATCH_IPCOMP +@@ -1157,6 +1157,13 @@ config NETFILTER_XT_MATCH_IPCOMP To compile it as a module, choose M here. If unsure, say N. @@ -37,7 +37,7 @@ depends on NETFILTER_ADVANCED --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile -@@ -140,6 +140,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += +@@ -143,6 +143,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o