netfilter: nat: merge nf_nat_ipv4,6 into nat core
authorFlorian Westphal <fw@strlen.de>
Tue, 19 Feb 2019 16:38:21 +0000 (17:38 +0100)
committerPablo Neira Ayuso <pablo@netfilter.org>
Wed, 27 Feb 2019 09:49:55 +0000 (10:49 +0100)
before:
   text    data     bss     dec     hex filename
  16566    1576    4136   22278    5706 nf_nat.ko
   3598     844       0    4442    115a nf_nat_ipv6.ko
   3187     844       0    4031     fbf nf_nat_ipv4.ko

after:
   text    data     bss     dec     hex filename
  22948    1612    4136   28696    7018 nf_nat.ko

... with ipv4/v6 nat now provided directly via nf_nat.ko.

Also changes:
       ret = nf_nat_ipv4_fn(priv, skb, state);
       if (ret != NF_DROP && ret != NF_STOLEN &&
into
if (ret != NF_ACCEPT)
return ret;

everywhere.

The nat hooks never should return anything other than
ACCEPT or DROP (and the latter only in rare error cases).

The original code uses multi-line ANDing including assignment-in-if:
        if (ret != NF_DROP && ret != NF_STOLEN &&
           !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
            (ct = nf_ct_get(skb, &ctinfo)) != NULL) {

I removed this while moving, breaking those in separate conditionals
and moving the assignments into extra lines.

checkpatch still generates some warnings:
 1. Overly long lines (of moved code).
    Breaking them is even more ugly. so I kept this as-is.
 2. use of extern function declarations in a .c file.
    This is necessary evil, we must call
    nf_nat_l3proto_register() from the nat core now.
    All l3proto related functions are removed later in this series,
    those prototypes are then removed as well.

v2: keep empty nf_nat_ipv6_csum_update stub for CONFIG_IPV6=n case.
v3: remove IS_ENABLED(NF_NAT_IPV4/6) tests, NF_NAT_IPVx toggles
    are removed here.
v4: also get rid of the assignments in conditionals.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
12 files changed:
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c [deleted file]
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c [deleted file]
net/netfilter/Kconfig
net/netfilter/nf_nat_core.c
net/netfilter/nf_nat_proto.c
net/openvswitch/Kconfig
net/openvswitch/conntrack.c
tools/testing/selftests/net/config

index db05a835748ab7f70eb83d772e4e9e894acaa41c..8688461ed07753e53702f9eeee267ad6122f414e 100644 (file)
@@ -94,17 +94,7 @@ config NF_REJECT_IPV4
        tristate "IPv4 packet rejection"
        default m if NETFILTER_ADVANCED=n
 
-config NF_NAT_IPV4
-       tristate "IPv4 NAT"
-       depends on NF_CONNTRACK
-       default m if NETFILTER_ADVANCED=n
-       select NF_NAT
-       help
-         The IPv4 NAT option allows masquerading, port forwarding and other
-         forms of full Network Address Port Translation. This can be
-         controlled by iptables or nft.
-
-if NF_NAT_IPV4
+if NF_NAT
 
 if NF_TABLES
 config NFT_CHAIN_NAT_IPV4
@@ -163,7 +153,7 @@ config NF_NAT_H323
        depends on NF_CONNTRACK
        default NF_CONNTRACK_H323
 
-endif # NF_NAT_IPV4
+endif # NF_NAT
 
 config IP_NF_IPTABLES
        tristate "IP tables support (required for filtering/masq/NAT)"
@@ -260,7 +250,6 @@ config IP_NF_NAT
        depends on NF_CONNTRACK
        default m if NETFILTER_ADVANCED=n
        select NF_NAT
-       select NF_NAT_IPV4
        select NETFILTER_XT_NAT
        help
          This enables the `nat' table in iptables. This allows masquerading,
index ddeb35ab8bdb630da04f0526d3f55eed63efaaea..b2cdf705fdf18bc74cced1a07c4ae06764a18138 100644 (file)
@@ -3,9 +3,6 @@
 # Makefile for the netfilter modules on top of IPv4.
 #
 
-nf_nat_ipv4-y          := nf_nat_l3proto_ipv4.o
-obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
-
 # defrag
 obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
 
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
deleted file mode 100644 (file)
index 36b4f96..0000000
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/secure_seq.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <net/ip.h>
-
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
-
-static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
-                                 unsigned int iphdroff,
-                                 const struct nf_conntrack_tuple *target,
-                                 enum nf_nat_manip_type maniptype)
-{
-       struct iphdr *iph;
-       unsigned int hdroff;
-
-       if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
-               return false;
-
-       iph = (void *)skb->data + iphdroff;
-       hdroff = iphdroff + iph->ihl * 4;
-
-       if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
-                                     hdroff, target, maniptype))
-               return false;
-       iph = (void *)skb->data + iphdroff;
-
-       if (maniptype == NF_NAT_MANIP_SRC) {
-               csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
-               iph->saddr = target->src.u3.ip;
-       } else {
-               csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
-               iph->daddr = target->dst.u3.ip;
-       }
-       return true;
-}
-
-static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
-                                   unsigned int iphdroff, __sum16 *check,
-                                   const struct nf_conntrack_tuple *t,
-                                   enum nf_nat_manip_type maniptype)
-{
-       struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
-       __be32 oldip, newip;
-
-       if (maniptype == NF_NAT_MANIP_SRC) {
-               oldip = iph->saddr;
-               newip = t->src.u3.ip;
-       } else {
-               oldip = iph->daddr;
-               newip = t->dst.u3.ip;
-       }
-       inet_proto_csum_replace4(check, skb, oldip, newip, true);
-}
-
-static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
-                                   u8 proto, void *data, __sum16 *check,
-                                   int datalen, int oldlen)
-{
-       if (skb->ip_summed != CHECKSUM_PARTIAL) {
-               const struct iphdr *iph = ip_hdr(skb);
-
-               skb->ip_summed = CHECKSUM_PARTIAL;
-               skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
-                       ip_hdrlen(skb);
-               skb->csum_offset = (void *)check - data;
-               *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
-                                           proto, 0);
-       } else
-               inet_proto_csum_replace2(check, skb,
-                                        htons(oldlen), htons(datalen), true);
-}
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
-       .l3proto                = NFPROTO_IPV4,
-       .manip_pkt              = nf_nat_ipv4_manip_pkt,
-       .csum_update            = nf_nat_ipv4_csum_update,
-       .csum_recalc            = nf_nat_ipv4_csum_recalc,
-};
-
-int nf_nat_icmp_reply_translation(struct sk_buff *skb,
-                                 struct nf_conn *ct,
-                                 enum ip_conntrack_info ctinfo,
-                                 unsigned int hooknum)
-{
-       struct {
-               struct icmphdr  icmp;
-               struct iphdr    ip;
-       } *inside;
-       enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-       enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
-       unsigned int hdrlen = ip_hdrlen(skb);
-       struct nf_conntrack_tuple target;
-       unsigned long statusbit;
-
-       WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
-
-       if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
-               return 0;
-       if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
-               return 0;
-
-       inside = (void *)skb->data + hdrlen;
-       if (inside->icmp.type == ICMP_REDIRECT) {
-               if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
-                       return 0;
-               if (ct->status & IPS_NAT_MASK)
-                       return 0;
-       }
-
-       if (manip == NF_NAT_MANIP_SRC)
-               statusbit = IPS_SRC_NAT;
-       else
-               statusbit = IPS_DST_NAT;
-
-       /* Invert if this is reply direction */
-       if (dir == IP_CT_DIR_REPLY)
-               statusbit ^= IPS_NAT_MASK;
-
-       if (!(ct->status & statusbit))
-               return 1;
-
-       if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
-                                  &ct->tuplehash[!dir].tuple, !manip))
-               return 0;
-
-       if (skb->ip_summed != CHECKSUM_PARTIAL) {
-               /* Reloading "inside" here since manip_pkt may reallocate */
-               inside = (void *)skb->data + hdrlen;
-               inside->icmp.checksum = 0;
-               inside->icmp.checksum =
-                       csum_fold(skb_checksum(skb, hdrlen,
-                                              skb->len - hdrlen, 0));
-       }
-
-       /* Change outer to look like the reply to an incoming packet */
-       nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
-       target.dst.protonum = IPPROTO_ICMP;
-       if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
-               return 0;
-
-       return 1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
-
-static unsigned int
-nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
-              const struct nf_hook_state *state)
-{
-       struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct)
-               return NF_ACCEPT;
-
-       if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
-               if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
-                       if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
-                                                          state->hook))
-                               return NF_DROP;
-                       else
-                               return NF_ACCEPT;
-               }
-       }
-
-       return nf_nat_inet_fn(priv, skb, state);
-}
-
-static unsigned int
-nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
-              const struct nf_hook_state *state)
-{
-       unsigned int ret;
-       __be32 daddr = ip_hdr(skb)->daddr;
-
-       ret = nf_nat_ipv4_fn(priv, skb, state);
-       if (ret != NF_DROP && ret != NF_STOLEN &&
-           daddr != ip_hdr(skb)->daddr)
-               skb_dst_drop(skb);
-
-       return ret;
-}
-
-static unsigned int
-nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
-               const struct nf_hook_state *state)
-{
-#ifdef CONFIG_XFRM
-       const struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-       int err;
-#endif
-       unsigned int ret;
-
-       ret = nf_nat_ipv4_fn(priv, skb, state);
-#ifdef CONFIG_XFRM
-       if (ret != NF_DROP && ret != NF_STOLEN &&
-           !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
-           (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-               if ((ct->tuplehash[dir].tuple.src.u3.ip !=
-                    ct->tuplehash[!dir].tuple.dst.u3.ip) ||
-                   (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
-                    ct->tuplehash[dir].tuple.src.u.all !=
-                    ct->tuplehash[!dir].tuple.dst.u.all)) {
-                       err = nf_xfrm_me_harder(state->net, skb, AF_INET);
-                       if (err < 0)
-                               ret = NF_DROP_ERR(err);
-               }
-       }
-#endif
-       return ret;
-}
-
-static unsigned int
-nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
-                    const struct nf_hook_state *state)
-{
-       const struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-       unsigned int ret;
-       int err;
-
-       ret = nf_nat_ipv4_fn(priv, skb, state);
-       if (ret != NF_DROP && ret != NF_STOLEN &&
-           (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-               if (ct->tuplehash[dir].tuple.dst.u3.ip !=
-                   ct->tuplehash[!dir].tuple.src.u3.ip) {
-                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
-                       if (err < 0)
-                               ret = NF_DROP_ERR(err);
-               }
-#ifdef CONFIG_XFRM
-               else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
-                        ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
-                        ct->tuplehash[dir].tuple.dst.u.all !=
-                        ct->tuplehash[!dir].tuple.src.u.all) {
-                       err = nf_xfrm_me_harder(state->net, skb, AF_INET);
-                       if (err < 0)
-                               ret = NF_DROP_ERR(err);
-               }
-#endif
-       }
-       return ret;
-}
-
-static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
-       /* Before packet filtering, change destination */
-       {
-               .hook           = nf_nat_ipv4_in,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_PRE_ROUTING,
-               .priority       = NF_IP_PRI_NAT_DST,
-       },
-       /* After packet filtering, change source */
-       {
-               .hook           = nf_nat_ipv4_out,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_POST_ROUTING,
-               .priority       = NF_IP_PRI_NAT_SRC,
-       },
-       /* Before packet filtering, change destination */
-       {
-               .hook           = nf_nat_ipv4_local_fn,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_LOCAL_OUT,
-               .priority       = NF_IP_PRI_NAT_DST,
-       },
-       /* After packet filtering, change source */
-       {
-               .hook           = nf_nat_ipv4_fn,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_LOCAL_IN,
-               .priority       = NF_IP_PRI_NAT_SRC,
-       },
-};
-
-int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-       return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_register_fn);
-
-void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-       nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
-
-static int __init nf_nat_l3proto_ipv4_init(void)
-{
-       return nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
-}
-
-static void __exit nf_nat_l3proto_ipv4_exit(void)
-{
-       nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
-}
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
-
-module_init(nf_nat_l3proto_ipv4_init);
-module_exit(nf_nat_l3proto_ipv4_exit);
index f57fc99e9a041fda54c283d26c2b78554b4acb75..a04a38166d8cc98c7872934f2312f4f98aafbbc7 100644 (file)
@@ -31,7 +31,7 @@ config NFT_CHAIN_ROUTE_IPV6
          fields such as the source, destination, flowlabel, hop-limit and
          the packet mark.
 
-if NF_NAT_IPV6
+if NF_NAT
 
 config NFT_CHAIN_NAT_IPV6
        tristate "IPv6 nf_tables nat chain support"
@@ -57,7 +57,7 @@ config NFT_REDIR_IPV6
          This is the expression that provides IPv4 redirect support for
          nf_tables.
 
-endif # NF_NAT_IPV6
+endif # NF_NAT
 
 config NFT_REJECT_IPV6
        select NF_REJECT_IPV6
@@ -106,16 +106,6 @@ config NF_LOG_IPV6
        default m if NETFILTER_ADVANCED=n
        select NF_LOG_COMMON
 
-config NF_NAT_IPV6
-       tristate "IPv6 NAT"
-       depends on NF_CONNTRACK
-       depends on NETFILTER_ADVANCED
-       select NF_NAT
-       help
-         The IPv6 NAT option allows masquerading, port forwarding and other
-         forms of full Network Address Port Translation. This can be
-         controlled by iptables or nft.
-
 config IP6_NF_IPTABLES
        tristate "IP6 tables support (required for filtering)"
        depends on INET && IPV6
@@ -304,7 +294,6 @@ config IP6_NF_NAT
        depends on NF_CONNTRACK
        depends on NETFILTER_ADVANCED
        select NF_NAT
-       select NF_NAT_IPV6
        select NETFILTER_XT_NAT
        help
          This enables the `nat' table in ip6tables. This allows masquerading,
index a7b18d13e056e0eba0eba3e628a8e415b2d09e8c..afb880427133d504eeac58a5fb386bb09bfb0785 100644 (file)
@@ -11,9 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
 
-nf_nat_ipv6-y          := nf_nat_l3proto_ipv6.o
-obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
-
 # defrag
 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
deleted file mode 100644 (file)
index 5d667cf..0000000
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of IPv6 NAT funded by Astaro.
- */
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/secure_seq.h>
-#include <net/checksum.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_route.h>
-#include <net/xfrm.h>
-#include <net/ipv6.h>
-
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
-
-static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
-                                 unsigned int iphdroff,
-                                 const struct nf_conntrack_tuple *target,
-                                 enum nf_nat_manip_type maniptype)
-{
-       struct ipv6hdr *ipv6h;
-       __be16 frag_off;
-       int hdroff;
-       u8 nexthdr;
-
-       if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
-               return false;
-
-       ipv6h = (void *)skb->data + iphdroff;
-       nexthdr = ipv6h->nexthdr;
-       hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
-                                 &nexthdr, &frag_off);
-       if (hdroff < 0)
-               goto manip_addr;
-
-       if ((frag_off & htons(~0x7)) == 0 &&
-           !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
-                                     target, maniptype))
-               return false;
-
-       /* must reload, offset might have changed */
-       ipv6h = (void *)skb->data + iphdroff;
-
-manip_addr:
-       if (maniptype == NF_NAT_MANIP_SRC)
-               ipv6h->saddr = target->src.u3.in6;
-       else
-               ipv6h->daddr = target->dst.u3.in6;
-
-       return true;
-}
-
-static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
-                                   unsigned int iphdroff, __sum16 *check,
-                                   const struct nf_conntrack_tuple *t,
-                                   enum nf_nat_manip_type maniptype)
-{
-       const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
-       const struct in6_addr *oldip, *newip;
-
-       if (maniptype == NF_NAT_MANIP_SRC) {
-               oldip = &ipv6h->saddr;
-               newip = &t->src.u3.in6;
-       } else {
-               oldip = &ipv6h->daddr;
-               newip = &t->dst.u3.in6;
-       }
-       inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
-                                 newip->s6_addr32, true);
-}
-
-static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
-                                   u8 proto, void *data, __sum16 *check,
-                                   int datalen, int oldlen)
-{
-       if (skb->ip_summed != CHECKSUM_PARTIAL) {
-               const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-
-               skb->ip_summed = CHECKSUM_PARTIAL;
-               skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
-                       (data - (void *)skb->data);
-               skb->csum_offset = (void *)check - data;
-               *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-                                         datalen, proto, 0);
-       } else
-               inet_proto_csum_replace2(check, skb,
-                                        htons(oldlen), htons(datalen), true);
-}
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
-       .l3proto                = NFPROTO_IPV6,
-       .manip_pkt              = nf_nat_ipv6_manip_pkt,
-       .csum_update            = nf_nat_ipv6_csum_update,
-       .csum_recalc            = nf_nat_ipv6_csum_recalc,
-};
-
-int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
-                                   struct nf_conn *ct,
-                                   enum ip_conntrack_info ctinfo,
-                                   unsigned int hooknum,
-                                   unsigned int hdrlen)
-{
-       struct {
-               struct icmp6hdr icmp6;
-               struct ipv6hdr  ip6;
-       } *inside;
-       enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-       enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
-       struct nf_conntrack_tuple target;
-       unsigned long statusbit;
-
-       WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
-
-       if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
-               return 0;
-       if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
-               return 0;
-
-       inside = (void *)skb->data + hdrlen;
-       if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
-               if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
-                       return 0;
-               if (ct->status & IPS_NAT_MASK)
-                       return 0;
-       }
-
-       if (manip == NF_NAT_MANIP_SRC)
-               statusbit = IPS_SRC_NAT;
-       else
-               statusbit = IPS_DST_NAT;
-
-       /* Invert if this is reply direction */
-       if (dir == IP_CT_DIR_REPLY)
-               statusbit ^= IPS_NAT_MASK;
-
-       if (!(ct->status & statusbit))
-               return 1;
-
-       if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
-                                  &ct->tuplehash[!dir].tuple, !manip))
-               return 0;
-
-       if (skb->ip_summed != CHECKSUM_PARTIAL) {
-               struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-               inside = (void *)skb->data + hdrlen;
-               inside->icmp6.icmp6_cksum = 0;
-               inside->icmp6.icmp6_cksum =
-                       csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-                                       skb->len - hdrlen, IPPROTO_ICMPV6,
-                                       skb_checksum(skb, hdrlen,
-                                                    skb->len - hdrlen, 0));
-       }
-
-       nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
-       target.dst.protonum = IPPROTO_ICMPV6;
-       if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
-               return 0;
-
-       return 1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
-
-static unsigned int
-nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
-              const struct nf_hook_state *state)
-{
-       struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-       __be16 frag_off;
-       int hdrlen;
-       u8 nexthdr;
-
-       ct = nf_ct_get(skb, &ctinfo);
-       /* Can't track?  It's not due to stress, or conntrack would
-        * have dropped it.  Hence it's the user's responsibilty to
-        * packet filter it out, or implement conntrack/NAT for that
-        * protocol. 8) --RR
-        */
-       if (!ct)
-               return NF_ACCEPT;
-
-       if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
-               nexthdr = ipv6_hdr(skb)->nexthdr;
-               hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
-                                         &nexthdr, &frag_off);
-
-               if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
-                       if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
-                                                            state->hook,
-                                                            hdrlen))
-                               return NF_DROP;
-                       else
-                               return NF_ACCEPT;
-               }
-       }
-
-       return nf_nat_inet_fn(priv, skb, state);
-}
-
-static unsigned int
-nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
-              const struct nf_hook_state *state)
-{
-       unsigned int ret;
-       struct in6_addr daddr = ipv6_hdr(skb)->daddr;
-
-       ret = nf_nat_ipv6_fn(priv, skb, state);
-       if (ret != NF_DROP && ret != NF_STOLEN &&
-           ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
-               skb_dst_drop(skb);
-
-       return ret;
-}
-
-static unsigned int
-nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
-               const struct nf_hook_state *state)
-{
-#ifdef CONFIG_XFRM
-       const struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-       int err;
-#endif
-       unsigned int ret;
-
-       ret = nf_nat_ipv6_fn(priv, skb, state);
-#ifdef CONFIG_XFRM
-       if (ret != NF_DROP && ret != NF_STOLEN &&
-           !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-           (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-               if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
-                                     &ct->tuplehash[!dir].tuple.dst.u3) ||
-                   (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
-                    ct->tuplehash[dir].tuple.src.u.all !=
-                    ct->tuplehash[!dir].tuple.dst.u.all)) {
-                       err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
-                       if (err < 0)
-                               ret = NF_DROP_ERR(err);
-               }
-       }
-#endif
-       return ret;
-}
-
-static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
-{
-#ifdef CONFIG_IPV6_MODULE
-       const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-       if (!v6_ops)
-               return -EHOSTUNREACH;
-
-       return v6_ops->route_me_harder(net, skb);
-#else
-       return ip6_route_me_harder(net, skb);
-#endif
-}
-
-static unsigned int
-nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
-                    const struct nf_hook_state *state)
-{
-       const struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-       unsigned int ret;
-       int err;
-
-       ret = nf_nat_ipv6_fn(priv, skb, state);
-       if (ret != NF_DROP && ret != NF_STOLEN &&
-           (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-               if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
-                                     &ct->tuplehash[!dir].tuple.src.u3)) {
-                       err = nat_route_me_harder(state->net, skb);
-                       if (err < 0)
-                               ret = NF_DROP_ERR(err);
-               }
-#ifdef CONFIG_XFRM
-               else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-                        ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
-                        ct->tuplehash[dir].tuple.dst.u.all !=
-                        ct->tuplehash[!dir].tuple.src.u.all) {
-                       err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
-                       if (err < 0)
-                               ret = NF_DROP_ERR(err);
-               }
-#endif
-       }
-       return ret;
-}
-
-static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
-       /* Before packet filtering, change destination */
-       {
-               .hook           = nf_nat_ipv6_in,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_PRE_ROUTING,
-               .priority       = NF_IP6_PRI_NAT_DST,
-       },
-       /* After packet filtering, change source */
-       {
-               .hook           = nf_nat_ipv6_out,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_POST_ROUTING,
-               .priority       = NF_IP6_PRI_NAT_SRC,
-       },
-       /* Before packet filtering, change destination */
-       {
-               .hook           = nf_nat_ipv6_local_fn,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_LOCAL_OUT,
-               .priority       = NF_IP6_PRI_NAT_DST,
-       },
-       /* After packet filtering, change source */
-       {
-               .hook           = nf_nat_ipv6_fn,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_LOCAL_IN,
-               .priority       = NF_IP6_PRI_NAT_SRC,
-       },
-};
-
-int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-       return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn);
-
-void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
-{
-       nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
-
-static int __init nf_nat_l3proto_ipv6_init(void)
-{
-       return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
-}
-
-static void __exit nf_nat_l3proto_ipv6_exit(void)
-{
-       nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
-}
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
-
-module_init(nf_nat_l3proto_ipv6_init);
-module_exit(nf_nat_l3proto_ipv6_exit);
index 5a753cec005bb7fdc8d9124db4dc82dd4fe861d5..5beb51d39dc29b5a0875ed05427bc9f759fa910d 100644 (file)
@@ -396,7 +396,13 @@ config NETFILTER_NETLINK_GLUE_CT
          the enqueued via NFNETLINK.
 
 config NF_NAT
-       tristate
+       tristate "Network Address Translation support"
+       depends on NF_CONNTRACK
+       default m if NETFILTER_ADVANCED=n
+       help
+         The NAT option allows masquerading, port forwarding and other
+         forms of full Network Address Port Translation. This can be
+         controlled by iptables, ip6tables or nft.
 
 config NF_NAT_NEEDED
        bool
index 0f39ae7a9f34ed3334a2a3598a189c33ddf358e2..0c548ff215b2b34dbaba6b2543e301be0ddea1ba 100644 (file)
@@ -1203,6 +1203,8 @@ static struct nf_nat_hook nat_hook = {
        .manip_pkt              = nf_nat_manip_pkt,
 };
 
+int nf_nat_l3proto_init(void);
+void nf_nat_l3proto_exit(void);
 static int __init nf_nat_init(void)
 {
        int ret, i;
@@ -1237,6 +1239,19 @@ static int __init nf_nat_init(void)
        WARN_ON(nf_nat_hook != NULL);
        RCU_INIT_POINTER(nf_nat_hook, &nat_hook);
 
+       ret = nf_nat_l3proto_init();
+       if (ret) {
+               nf_ct_extend_unregister(&nat_extend);
+               nf_ct_helper_expectfn_unregister(&follow_master_nat);
+               RCU_INIT_POINTER(nf_nat_hook, NULL);
+
+               synchronize_net();
+               kvfree(nf_nat_bysource);
+               unregister_pernet_subsys(&nat_net_ops);
+
+               return ret;
+       }
+
        return 0;
 }
 
@@ -1246,6 +1261,8 @@ static void __exit nf_nat_cleanup(void)
 
        nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
 
+       nf_nat_l3proto_exit();
+
        nf_ct_extend_unregister(&nat_extend);
        nf_ct_helper_expectfn_unregister(&follow_master_nat);
        RCU_INIT_POINTER(nf_nat_hook, NULL);
index f83bf9d8c9f5269885accc1c3fb59a59e8fb3712..9c4db18741efabc8c7e942e2f3c88b38650de133 100644 (file)
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/netfilter/nf_nat_l4proto.h>
 
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
+#if IS_ENABLED(CONFIG_IPV6)
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
+#endif
+
 static void
 __udp_manip_pkt(struct sk_buff *skb,
                const struct nf_nat_l3proto *l3proto,
@@ -341,3 +357,665 @@ bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
        return true;
 }
 EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt);
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+                                 unsigned int iphdroff,
+                                 const struct nf_conntrack_tuple *target,
+                                 enum nf_nat_manip_type maniptype)
+{
+       struct iphdr *iph;
+       unsigned int hdroff;
+
+       if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+               return false;
+
+       iph = (void *)skb->data + iphdroff;
+       hdroff = iphdroff + iph->ihl * 4;
+
+       if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
+                                     hdroff, target, maniptype))
+               return false;
+       iph = (void *)skb->data + iphdroff;
+
+       if (maniptype == NF_NAT_MANIP_SRC) {
+               csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+               iph->saddr = target->src.u3.ip;
+       } else {
+               csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+               iph->daddr = target->dst.u3.ip;
+       }
+       return true;
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+                                 unsigned int iphdroff,
+                                 const struct nf_conntrack_tuple *target,
+                                 enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       struct ipv6hdr *ipv6h;
+       __be16 frag_off;
+       int hdroff;
+       u8 nexthdr;
+
+       if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+               return false;
+
+       ipv6h = (void *)skb->data + iphdroff;
+       nexthdr = ipv6h->nexthdr;
+       hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+                                 &nexthdr, &frag_off);
+       if (hdroff < 0)
+               goto manip_addr;
+
+       if ((frag_off & htons(~0x7)) == 0 &&
+           !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+                                     target, maniptype))
+               return false;
+
+       /* must reload, offset might have changed */
+       ipv6h = (void *)skb->data + iphdroff;
+
+manip_addr:
+       if (maniptype == NF_NAT_MANIP_SRC)
+               ipv6h->saddr = target->src.u3.in6;
+       else
+               ipv6h->daddr = target->dst.u3.in6;
+
+#endif
+       return true;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+                                   unsigned int iphdroff, __sum16 *check,
+                                   const struct nf_conntrack_tuple *t,
+                                   enum nf_nat_manip_type maniptype)
+{
+       struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+       __be32 oldip, newip;
+
+       if (maniptype == NF_NAT_MANIP_SRC) {
+               oldip = iph->saddr;
+               newip = t->src.u3.ip;
+       } else {
+               oldip = iph->daddr;
+               newip = t->dst.u3.ip;
+       }
+       inet_proto_csum_replace4(check, skb, oldip, newip, true);
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+                                   unsigned int iphdroff, __sum16 *check,
+                                   const struct nf_conntrack_tuple *t,
+                                   enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+       const struct in6_addr *oldip, *newip;
+
+       if (maniptype == NF_NAT_MANIP_SRC) {
+               oldip = &ipv6h->saddr;
+               newip = &t->src.u3.in6;
+       } else {
+               oldip = &ipv6h->daddr;
+               newip = &t->dst.u3.in6;
+       }
+       inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+                                 newip->s6_addr32, true);
+#endif
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+                                   u8 proto, void *data, __sum16 *check,
+                                   int datalen, int oldlen)
+{
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
+               const struct iphdr *iph = ip_hdr(skb);
+
+               skb->ip_summed = CHECKSUM_PARTIAL;
+               skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+                       ip_hdrlen(skb);
+               skb->csum_offset = (void *)check - data;
+               *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
+                                           proto, 0);
+       } else {
+               inet_proto_csum_replace2(check, skb,
+                                        htons(oldlen), htons(datalen), true);
+       }
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+                                   u8 proto, void *data, __sum16 *check,
+                                   int datalen, int oldlen)
+{
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
+               const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+               skb->ip_summed = CHECKSUM_PARTIAL;
+               skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+                       (data - (void *)skb->data);
+               skb->csum_offset = (void *)check - data;
+               *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+                                         datalen, proto, 0);
+       } else {
+               inet_proto_csum_replace2(check, skb,
+                                        htons(oldlen), htons(datalen), true);
+       }
+}
+#endif
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
+       .l3proto                = NFPROTO_IPV4,
+       .manip_pkt              = nf_nat_ipv4_manip_pkt,
+       .csum_update            = nf_nat_ipv4_csum_update,
+       .csum_recalc            = nf_nat_ipv4_csum_recalc,
+};
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+                                 struct nf_conn *ct,
+                                 enum ip_conntrack_info ctinfo,
+                                 unsigned int hooknum)
+{
+       struct {
+               struct icmphdr  icmp;
+               struct iphdr    ip;
+       } *inside;
+       enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+       enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+       unsigned int hdrlen = ip_hdrlen(skb);
+       struct nf_conntrack_tuple target;
+       unsigned long statusbit;
+
+       WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+       if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+               return 0;
+       if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+               return 0;
+
+       inside = (void *)skb->data + hdrlen;
+       if (inside->icmp.type == ICMP_REDIRECT) {
+               if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+                       return 0;
+               if (ct->status & IPS_NAT_MASK)
+                       return 0;
+       }
+
+       if (manip == NF_NAT_MANIP_SRC)
+               statusbit = IPS_SRC_NAT;
+       else
+               statusbit = IPS_DST_NAT;
+
+       /* Invert if this is reply direction */
+       if (dir == IP_CT_DIR_REPLY)
+               statusbit ^= IPS_NAT_MASK;
+
+       if (!(ct->status & statusbit))
+               return 1;
+
+       if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+                                  &ct->tuplehash[!dir].tuple, !manip))
+               return 0;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
+               /* Reloading "inside" here since manip_pkt may reallocate */
+               inside = (void *)skb->data + hdrlen;
+               inside->icmp.checksum = 0;
+               inside->icmp.checksum =
+                       csum_fold(skb_checksum(skb, hdrlen,
+                                              skb->len - hdrlen, 0));
+       }
+
+       /* Change outer to look like the reply to an incoming packet */
+       nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+       target.dst.protonum = IPPROTO_ICMP;
+       if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
+               return 0;
+
+       return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static unsigned int
+nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
+              const struct nf_hook_state *state)
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct)
+               return NF_ACCEPT;
+
+       if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+               if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+                       if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+                                                          state->hook))
+                               return NF_DROP;
+                       else
+                               return NF_ACCEPT;
+               }
+       }
+
+       return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
+              const struct nf_hook_state *state)
+{
+       unsigned int ret;
+       __be32 daddr = ip_hdr(skb)->daddr;
+
+       ret = nf_nat_ipv4_fn(priv, skb, state);
+       if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
+               skb_dst_drop(skb);
+
+       return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
+               const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+       const struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       int err;
+#endif
+       unsigned int ret;
+
+       ret = nf_nat_ipv4_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+       if (ret != NF_ACCEPT)
+               return ret;
+
+       if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
+               return ret;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (ct) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (ct->tuplehash[dir].tuple.src.u3.ip !=
+                    ct->tuplehash[!dir].tuple.dst.u3.ip ||
+                   (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+                    ct->tuplehash[dir].tuple.src.u.all !=
+                    ct->tuplehash[!dir].tuple.dst.u.all)) {
+                       err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+                       if (err < 0)
+                               ret = NF_DROP_ERR(err);
+               }
+       }
+#endif
+       return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
+                    const struct nf_hook_state *state)
+{
+       const struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       unsigned int ret;
+       int err;
+
+       ret = nf_nat_ipv4_fn(priv, skb, state);
+       if (ret != NF_ACCEPT)
+               return ret;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (ct) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+                   ct->tuplehash[!dir].tuple.src.u3.ip) {
+                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+                       if (err < 0)
+                               ret = NF_DROP_ERR(err);
+               }
+#ifdef CONFIG_XFRM
+               else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+                        ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+                        ct->tuplehash[dir].tuple.dst.u.all !=
+                        ct->tuplehash[!dir].tuple.src.u.all) {
+                       err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+                       if (err < 0)
+                               ret = NF_DROP_ERR(err);
+               }
+#endif
+       }
+       return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
+       /* Before packet filtering, change destination */
+       {
+               .hook           = nf_nat_ipv4_in,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_PRE_ROUTING,
+               .priority       = NF_IP_PRI_NAT_DST,
+       },
+       /* After packet filtering, change source */
+       {
+               .hook           = nf_nat_ipv4_out,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP_PRI_NAT_SRC,
+       },
+       /* Before packet filtering, change destination */
+       {
+               .hook           = nf_nat_ipv4_local_fn,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP_PRI_NAT_DST,
+       },
+       /* After packet filtering, change source */
+       {
+               .hook           = nf_nat_ipv4_fn,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = NF_IP_PRI_NAT_SRC,
+       },
+};
+
+int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+       return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_register_fn);
+
+void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+       nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
+
+int nf_nat_l3proto_init(void)
+{
+       int ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
+
+#if IS_ENABLED(CONFIG_IPV6)
+       if (ret)
+               return ret;
+
+       ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
+       if (ret == 0)
+               return ret;
+
+       nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
+#endif
+       return ret;
+}
+
+void nf_nat_l3proto_exit(void)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
+#endif
+       nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
+       .l3proto                = NFPROTO_IPV6,
+       .manip_pkt              = nf_nat_ipv6_manip_pkt,
+       .csum_update            = nf_nat_ipv6_csum_update,
+       .csum_recalc            = nf_nat_ipv6_csum_recalc,
+};
+
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+                                   struct nf_conn *ct,
+                                   enum ip_conntrack_info ctinfo,
+                                   unsigned int hooknum,
+                                   unsigned int hdrlen)
+{
+       struct {
+               struct icmp6hdr icmp6;
+               struct ipv6hdr  ip6;
+       } *inside;
+       enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+       enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+       struct nf_conntrack_tuple target;
+       unsigned long statusbit;
+
+       WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+       if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+               return 0;
+       if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+               return 0;
+
+       inside = (void *)skb->data + hdrlen;
+       if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+               if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+                       return 0;
+               if (ct->status & IPS_NAT_MASK)
+                       return 0;
+       }
+
+       if (manip == NF_NAT_MANIP_SRC)
+               statusbit = IPS_SRC_NAT;
+       else
+               statusbit = IPS_DST_NAT;
+
+       /* Invert if this is reply direction */
+       if (dir == IP_CT_DIR_REPLY)
+               statusbit ^= IPS_NAT_MASK;
+
+       if (!(ct->status & statusbit))
+               return 1;
+
+       if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+                                  &ct->tuplehash[!dir].tuple, !manip))
+               return 0;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
+               struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+               inside = (void *)skb->data + hdrlen;
+               inside->icmp6.icmp6_cksum = 0;
+               inside->icmp6.icmp6_cksum =
+                       csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+                                       skb->len - hdrlen, IPPROTO_ICMPV6,
+                                       skb_checksum(skb, hdrlen,
+                                                    skb->len - hdrlen, 0));
+       }
+
+       nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+       target.dst.protonum = IPPROTO_ICMPV6;
+       if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
+               return 0;
+
+       return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static unsigned int
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
+              const struct nf_hook_state *state)
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       __be16 frag_off;
+       int hdrlen;
+       u8 nexthdr;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       /* Can't track?  It's not due to stress, or conntrack would
+        * have dropped it.  Hence it's the user's responsibilty to
+        * packet filter it out, or implement conntrack/NAT for that
+        * protocol. 8) --RR
+        */
+       if (!ct)
+               return NF_ACCEPT;
+
+       if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+               nexthdr = ipv6_hdr(skb)->nexthdr;
+               hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+                                         &nexthdr, &frag_off);
+
+               if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+                       if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+                                                            state->hook,
+                                                            hdrlen))
+                               return NF_DROP;
+                       else
+                               return NF_ACCEPT;
+               }
+       }
+
+       return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
+              const struct nf_hook_state *state)
+{
+       unsigned int ret;
+       struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+       ret = nf_nat_ipv6_fn(priv, skb, state);
+       if (ret != NF_DROP && ret != NF_STOLEN &&
+           ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+               skb_dst_drop(skb);
+
+       return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
+               const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+       const struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       int err;
+#endif
+       unsigned int ret;
+
+       ret = nf_nat_ipv6_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+       if (ret != NF_ACCEPT)
+               return ret;
+
+       if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
+               return ret;
+       ct = nf_ct_get(skb, &ctinfo);
+       if (ct) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+                                     &ct->tuplehash[!dir].tuple.dst.u3) ||
+                   (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+                    ct->tuplehash[dir].tuple.src.u.all !=
+                    ct->tuplehash[!dir].tuple.dst.u.all)) {
+                       err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+                       if (err < 0)
+                               ret = NF_DROP_ERR(err);
+               }
+       }
+#endif
+
+       return ret;
+}
+
+static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
+{
+#ifdef CONFIG_IPV6_MODULE
+       const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+       if (!v6_ops)
+               return -EHOSTUNREACH;
+
+       return v6_ops->route_me_harder(net, skb);
+#else
+       return ip6_route_me_harder(net, skb);
+#endif
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
+                    const struct nf_hook_state *state)
+{
+       const struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       unsigned int ret;
+       int err;
+
+       ret = nf_nat_ipv6_fn(priv, skb, state);
+       if (ret != NF_ACCEPT)
+               return ret;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (ct) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+                                     &ct->tuplehash[!dir].tuple.src.u3)) {
+                       err = nat_route_me_harder(state->net, skb);
+                       if (err < 0)
+                               ret = NF_DROP_ERR(err);
+               }
+#ifdef CONFIG_XFRM
+               else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+                        ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+                        ct->tuplehash[dir].tuple.dst.u.all !=
+                        ct->tuplehash[!dir].tuple.src.u.all) {
+                       err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+                       if (err < 0)
+                               ret = NF_DROP_ERR(err);
+               }
+#endif
+       }
+
+       return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
+       /* Before packet filtering, change destination */
+       {
+               .hook           = nf_nat_ipv6_in,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_PRE_ROUTING,
+               .priority       = NF_IP6_PRI_NAT_DST,
+       },
+       /* After packet filtering, change source */
+       {
+               .hook           = nf_nat_ipv6_out,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP6_PRI_NAT_SRC,
+       },
+       /* Before packet filtering, change destination */
+       {
+               .hook           = nf_nat_ipv6_local_fn,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP6_PRI_NAT_DST,
+       },
+       /* After packet filtering, change source */
+       {
+               .hook           = nf_nat_ipv6_fn,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = NF_IP6_PRI_NAT_SRC,
+       },
+};
+
+int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+       return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops,
+                                 ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn);
+
+void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+       nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
+#endif /* CONFIG_IPV6 */
index 89da9512ec1e1f6b84511abe9f77a40f191101c8..ac1cc6e381706f333473980b6f0514a692b03e54 100644 (file)
@@ -8,8 +8,6 @@ config OPENVSWITCH
        depends on !NF_CONNTRACK || \
                   (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
                                     (!NF_NAT || NF_NAT) && \
-                                    (!NF_NAT_IPV4 || NF_NAT_IPV4) && \
-                                    (!NF_NAT_IPV6 || NF_NAT_IPV6) && \
                                     (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
        select LIBCRC32C
        select MPLS
index 35884f83626025c6f5be3d4a895f9d2ffa75d888..def4d28fcbc343ff00e573b24f1a59dddb817d66 100644 (file)
@@ -745,14 +745,14 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
        switch (ctinfo) {
        case IP_CT_RELATED:
        case IP_CT_RELATED_REPLY:
-               if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
+               if (IS_ENABLED(CONFIG_NF_NAT) &&
                    skb->protocol == htons(ETH_P_IP) &&
                    ip_hdr(skb)->protocol == IPPROTO_ICMP) {
                        if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
                                                           hooknum))
                                err = NF_DROP;
                        goto push;
-               } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
+               } else if (IS_ENABLED(CONFIG_IPV6) &&
                           skb->protocol == htons(ETH_P_IPV6)) {
                        __be16 frag_off;
                        u8 nexthdr = ipv6_hdr(skb)->nexthdr;
@@ -1673,7 +1673,7 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
        }
 
        if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
-               if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
+               if (IS_ENABLED(CONFIG_NF_NAT) &&
                    info->family == NFPROTO_IPV4) {
                        if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
                                            info->range.min_addr.ip) ||
@@ -1682,7 +1682,7 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
                             (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
                                              info->range.max_addr.ip))))
                                return false;
-               } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
+               } else if (IS_ENABLED(CONFIG_IPV6) &&
                           info->family == NFPROTO_IPV6) {
                        if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
                                             &info->range.min_addr.in6) ||
index 5821bdd98d20bfe4fcfb96ca9e97ef5cce758f0e..e9c860d0041614fbb3df603e53bc0e460a753a11 100644 (file)
@@ -17,8 +17,7 @@ CONFIG_VLAN_8021Q=y
 CONFIG_NETFILTER=y
 CONFIG_NETFILTER_ADVANCED=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_NAT_IPV6=m
-CONFIG_NF_NAT_IPV4=m
+CONFIG_NF_NAT=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP6_NF_NAT=m