tunneling: Add generic Tunnel segmentation.
authorPravin B Shelar <pshelar@nicira.com>
Thu, 7 Mar 2013 13:21:51 +0000 (13:21 +0000)
committerDavid S. Miller <davem@davemloft.net>
Sat, 9 Mar 2013 21:09:17 +0000 (16:09 -0500)
Adds generic tunneling offloading support for IPv4-UDP based
tunnels.
GSO type is added to request this offload for a skb.
netdev feature NETIF_F_UDP_TUNNEL is added for hardware offloaded
udp-tunnel support. Currently no device supports this feature,
software offload is used.

This can be used by tunneling protocols like VXLAN.

CC: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/netdev_features.h
include/linux/skbuff.h
net/core/ethtool.c
net/ipv4/af_inet.c
net/ipv4/tcp.c
net/ipv4/udp.c
net/ipv6/ip6_offload.c
net/ipv6/udp_offload.c

index 3dd39340430e6b0ff369d8f10642d1e9f2301f26..f5e797c0c2a4c916e9e82edeb5a2c1e1ffae3f7f 100644 (file)
@@ -42,9 +42,9 @@ enum {
        NETIF_F_TSO6_BIT,               /* ... TCPv6 segmentation */
        NETIF_F_FSO_BIT,                /* ... FCoE segmentation */
        NETIF_F_GSO_GRE_BIT,            /* ... GRE with TSO */
-       /**/NETIF_F_GSO_LAST,           /* [can't be last bit, see GSO_MASK] */
-       NETIF_F_GSO_RESERVED2           /* ... free (fill GSO_MASK to 8 bits) */
-               = NETIF_F_GSO_LAST,
+       NETIF_F_GSO_UDP_TUNNEL_BIT,     /* ... UDP TUNNEL with TSO */
+       /**/NETIF_F_GSO_LAST =          /* last bit, see GSO_MASK */
+               NETIF_F_GSO_UDP_TUNNEL_BIT,
 
        NETIF_F_FCOE_CRC_BIT,           /* FCoE CRC32 */
        NETIF_F_SCTP_CSUM_BIT,          /* SCTP checksum offload */
@@ -103,6 +103,7 @@ enum {
 #define NETIF_F_RXFCS          __NETIF_F(RXFCS)
 #define NETIF_F_RXALL          __NETIF_F(RXALL)
 #define NETIF_F_GRE_GSO                __NETIF_F(GSO_GRE)
+#define NETIF_F_UDP_TUNNEL     __NETIF_F(UDP_TUNNEL)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
index d7f96ff68f77db6b27579e59d36654ff84e92fa6..eb2106fe3bb4c7bbeb7cc874f3d1d0a63a7f81b7 100644 (file)
@@ -316,6 +316,8 @@ enum {
        SKB_GSO_FCOE = 1 << 5,
 
        SKB_GSO_GRE = 1 << 6,
+
+       SKB_GSO_UDP_TUNNEL = 1 << 7,
 };
 
 #if BITS_PER_LONG > 32
index 3e9b2c3e30f063cb3e59545f2af31176ea7b0151..adc1351e6873aa826e71aa4ff7e96a9df23d74b4 100644 (file)
@@ -78,6 +78,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
        [NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
        [NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
        [NETIF_F_GSO_GRE_BIT] =          "tx-gre-segmentation",
+       [NETIF_F_GSO_UDP_TUNNEL_BIT] =   "tx-udp_tnl-segmentation",
 
        [NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
        [NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
index dc3f677360a5bc0975c1e079cf872ef422419626..9e5882caf8a7fdf619613467f3fdf0cfab431a56 100644 (file)
@@ -1283,6 +1283,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
        int ihl;
        int id;
        unsigned int offset = 0;
+       bool tunnel;
 
        if (unlikely(skb_shinfo(skb)->gso_type &
                     ~(SKB_GSO_TCPV4 |
@@ -1290,6 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
                       SKB_GSO_DODGY |
                       SKB_GSO_TCP_ECN |
                       SKB_GSO_GRE |
+                      SKB_GSO_UDP_TUNNEL |
                       0)))
                goto out;
 
@@ -1304,6 +1306,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
        if (unlikely(!pskb_may_pull(skb, ihl)))
                goto out;
 
+       tunnel = !!skb->encapsulation;
+
        __skb_pull(skb, ihl);
        skb_reset_transport_header(skb);
        iph = ip_hdr(skb);
@@ -1323,7 +1327,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
        skb = segs;
        do {
                iph = ip_hdr(skb);
-               if (proto == IPPROTO_UDP) {
+               if (!tunnel && proto == IPPROTO_UDP) {
                        iph->id = htons(id);
                        iph->frag_off = htons(offset >> 3);
                        if (skb->next != NULL)
index 47e854fcae24dd8c108e93a2b5a22801a68d7232..8d14573ade772f62378da2f90393806d4069ae04 100644 (file)
@@ -3044,6 +3044,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
                               SKB_GSO_TCP_ECN |
                               SKB_GSO_TCPV6 |
                               SKB_GSO_GRE |
+                              SKB_GSO_UDP_TUNNEL |
                               0) ||
                             !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
                        goto out;
index 265c42cf963c30cce55016bbf4ef0a13f57e6ec5..41760e043bf5ad3d602102bcc6c85f658b7a754b 100644 (file)
@@ -2272,31 +2272,88 @@ void __init udp_init(void)
 
 int udp4_ufo_send_check(struct sk_buff *skb)
 {
-       const struct iphdr *iph;
-       struct udphdr *uh;
-
-       if (!pskb_may_pull(skb, sizeof(*uh)))
+       if (!pskb_may_pull(skb, sizeof(struct udphdr)))
                return -EINVAL;
 
-       iph = ip_hdr(skb);
-       uh = udp_hdr(skb);
+       if (likely(!skb->encapsulation)) {
+               const struct iphdr *iph;
+               struct udphdr *uh;
 
-       uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
-                                      IPPROTO_UDP, 0);
-       skb->csum_start = skb_transport_header(skb) - skb->head;
-       skb->csum_offset = offsetof(struct udphdr, check);
-       skb->ip_summed = CHECKSUM_PARTIAL;
+               iph = ip_hdr(skb);
+               uh = udp_hdr(skb);
+
+               uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+                               IPPROTO_UDP, 0);
+               skb->csum_start = skb_transport_header(skb) - skb->head;
+               skb->csum_offset = offsetof(struct udphdr, check);
+               skb->ip_summed = CHECKSUM_PARTIAL;
+       }
        return 0;
 }
 
+static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+               netdev_features_t features)
+{
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       int mac_len = skb->mac_len;
+       int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+       int outer_hlen;
+       netdev_features_t enc_features;
+
+       if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+               goto out;
+
+       skb->encapsulation = 0;
+       __skb_pull(skb, tnl_hlen);
+       skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb_inner_network_offset(skb));
+       skb->mac_len = skb_inner_network_offset(skb);
+
+       /* segment inner packet. */
+       enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+       segs = skb_mac_gso_segment(skb, enc_features);
+       if (!segs || IS_ERR(segs))
+               goto out;
+
+       outer_hlen = skb_tnl_header_len(skb);
+       skb = segs;
+       do {
+               struct udphdr *uh;
+               int udp_offset = outer_hlen - tnl_hlen;
+
+               skb->mac_len = mac_len;
+
+               skb_push(skb, outer_hlen);
+               skb_reset_mac_header(skb);
+               skb_set_network_header(skb, mac_len);
+               skb_set_transport_header(skb, udp_offset);
+               uh = udp_hdr(skb);
+               uh->len = htons(skb->len - udp_offset);
+
+               /* csum segment if tunnel sets skb with csum. */
+               if (unlikely(uh->check)) {
+                       struct iphdr *iph = ip_hdr(skb);
+
+                       uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+                                                      skb->len - udp_offset,
+                                                      IPPROTO_UDP, 0);
+                       uh->check = csum_fold(skb_checksum(skb, udp_offset,
+                                                          skb->len - udp_offset, 0));
+                       if (uh->check == 0)
+                               uh->check = CSUM_MANGLED_0;
+
+               }
+               skb->ip_summed = CHECKSUM_NONE;
+       } while ((skb = skb->next));
+out:
+       return segs;
+}
+
 struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
        netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        unsigned int mss;
-       int offset;
-       __wsum csum;
-
        mss = skb_shinfo(skb)->gso_size;
        if (unlikely(skb->len <= mss))
                goto out;
@@ -2306,6 +2363,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
                int type = skb_shinfo(skb)->gso_type;
 
                if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+                                     SKB_GSO_UDP_TUNNEL |
                                      SKB_GSO_GRE) ||
                             !(type & (SKB_GSO_UDP))))
                        goto out;
@@ -2316,20 +2374,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
                goto out;
        }
 
-       /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-        * do checksum of UDP packets sent as multiple IP fragments.
-        */
-       offset = skb_checksum_start_offset(skb);
-       csum = skb_checksum(skb, offset, skb->len - offset, 0);
-       offset += skb->csum_offset;
-       *(__sum16 *)(skb->data + offset) = csum_fold(csum);
-       skb->ip_summed = CHECKSUM_NONE;
-
        /* Fragment the skb. IP headers of the fragments are updated in
         * inet_gso_segment()
         */
-       segs = skb_segment(skb, features);
+       if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+               segs = skb_udp_tunnel_segment(skb, features);
+       else {
+               int offset;
+               __wsum csum;
+
+               /* Do software UFO. Complete and fill in the UDP checksum as
+                * HW cannot do checksum of UDP packets sent as multiple
+                * IP fragments.
+                */
+               offset = skb_checksum_start_offset(skb);
+               csum = skb_checksum(skb, offset, skb->len - offset, 0);
+               offset += skb->csum_offset;
+               *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+               skb->ip_summed = CHECKSUM_NONE;
+
+               segs = skb_segment(skb, features);
+       }
 out:
        return segs;
 }
-
index 7a0d25a5479c0a906d38cee3d6caa3b4254fbf43..71b766ee821d64fd10e99482b962ca6cea07cdad 100644 (file)
@@ -97,6 +97,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
                       SKB_GSO_DODGY |
                       SKB_GSO_TCP_ECN |
                       SKB_GSO_GRE |
+                      SKB_GSO_UDP_TUNNEL |
                       SKB_GSO_TCPV6 |
                       0)))
                goto out;
index cf05cf073c517d2e0fae4c967acffffa77befcbc..3bb3a891a42416b23ddb278d3fd7c051d25cfcf7 100644 (file)
@@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
        const struct ipv6hdr *ipv6h;
        struct udphdr *uh;
 
+       /* UDP Tunnel offload on ipv6 is not yet supported. */
+       if (skb->encapsulation)
+               return -EINVAL;
+
        if (!pskb_may_pull(skb, sizeof(*uh)))
                return -EINVAL;
 
@@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
                /* Packet is from an untrusted source, reset gso_segs. */
                int type = skb_shinfo(skb)->gso_type;
 
-               if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+               if (unlikely(type & ~(SKB_GSO_UDP |
+                                     SKB_GSO_DODGY |
+                                     SKB_GSO_UDP_TUNNEL |
                                      SKB_GSO_GRE) ||
                             !(type & (SKB_GSO_UDP))))
                        goto out;