From: Felix Fietkau <nbd@nbd.name> Date: Fri, 3 May 2024 16:19:56 +0000 (+0200) Subject: kernel: backport upstream GRO fixes X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=2f5398588a7cda749e7fcda1763d316ef75bb18f;p=openwrt%2Fstaging%2Fpepe2k.git kernel: backport upstream GRO fixes Signed-off-by: Felix Fietkau <nbd@nbd.name> --- diff --git a/target/linux/generic/backport-6.1/600-v6.9-01-net-gro-parse-ipv6-ext-headers-without-frag0-invalid.patch b/target/linux/generic/backport-6.1/600-v6.9-01-net-gro-parse-ipv6-ext-headers-without-frag0-invalid.patch new file mode 100644 index 0000000000..6dbec3c752 --- /dev/null +++ b/target/linux/generic/backport-6.1/600-v6.9-01-net-gro-parse-ipv6-ext-headers-without-frag0-invalid.patch @@ -0,0 +1,107 @@ +From: Richard Gobert <richardbgobert@gmail.com> +Date: Wed, 3 Jan 2024 15:44:21 +0100 +Subject: [PATCH] net: gro: parse ipv6 ext headers without frag0 invalidation + +The existing code always pulls the IPv6 header and sets the transport +offset initially. Then optionally again pulls any extension headers in +ipv6_gso_pull_exthdrs and sets the transport offset again on return from +that call. skb->data is set at the start of the first extension header +before calling ipv6_gso_pull_exthdrs, and must disable the frag0 +optimization because that function uses pskb_may_pull/pskb_pull instead of +skb_gro_ helpers. It sets the GRO offset to the TCP header with +skb_gro_pull and sets the transport header. Then returns skb->data to its +position before this block. + +This commit introduces a new helper function - ipv6_gro_pull_exthdrs - +which is used in ipv6_gro_receive to pull ipv6 ext headers instead of +ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all +operations use skb_gro_* helpers, and the frag0 fast path can be taken for +IPv6 packets with ext headers. + +Signed-off-by: Richard Gobert <richardbgobert@gmail.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Reviewed-by: David Ahern <dsahern@kernel.org> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://lore.kernel.org/r/504130f6-b56c-4dcc-882c-97942c59f5b7@gmail.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +--- + +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -36,6 +36,40 @@ + INDIRECT_CALL_L4(cb, f2, f1, head, skb); \ + }) + ++static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto) ++{ ++ const struct net_offload *ops = NULL; ++ struct ipv6_opt_hdr *opth; ++ ++ for (;;) { ++ int len; ++ ++ ops = rcu_dereference(inet6_offloads[proto]); ++ ++ if (unlikely(!ops)) ++ break; ++ ++ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) ++ break; ++ ++ opth = skb_gro_header(skb, off + sizeof(*opth), off); ++ if (unlikely(!opth)) ++ break; ++ ++ len = ipv6_optlen(opth); ++ ++ opth = skb_gro_header(skb, off + len, off); ++ if (unlikely(!opth)) ++ break; ++ proto = opth->nexthdr; ++ ++ off += len; ++ } ++ ++ skb_gro_pull(skb, off - skb_network_offset(skb)); ++ return proto; ++} ++ + static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) + { + const struct net_offload *ops = NULL; +@@ -224,28 +258,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff * + goto out; + + skb_set_network_header(skb, off); +- skb_gro_pull(skb, sizeof(*iph)); +- skb_set_transport_header(skb, skb_gro_offset(skb)); + +- flush += ntohs(iph->payload_len) != skb_gro_len(skb); ++ flush += ntohs(iph->payload_len) != skb->len - hlen; + + proto = iph->nexthdr; + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) { +- pskb_pull(skb, skb_gro_offset(skb)); +- skb_gro_frag0_invalidate(skb); +- proto = ipv6_gso_pull_exthdrs(skb, proto); +- skb_gro_pull(skb, -skb_transport_offset(skb)); +- skb_reset_transport_header(skb); +- __skb_push(skb, skb_gro_offset(skb)); ++ proto = ipv6_gro_pull_exthdrs(skb, hlen, proto); + + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + +- iph = ipv6_hdr(skb); ++ iph = skb_gro_network_header(skb); ++ } else { ++ skb_gro_pull(skb, sizeof(*iph)); + } + ++ skb_set_transport_header(skb, skb_gro_offset(skb)); ++ + NAPI_GRO_CB(skb)->proto = proto; + + flush--; diff --git a/target/linux/generic/backport-6.1/600-v6.9-03-net-gro-add-flush-check-in-udp_gro_receive_segment.patch b/target/linux/generic/backport-6.1/600-v6.9-03-net-gro-add-flush-check-in-udp_gro_receive_segment.patch new file mode 100644 index 0000000000..55dac85df8 --- /dev/null +++ b/target/linux/generic/backport-6.1/600-v6.9-03-net-gro-add-flush-check-in-udp_gro_receive_segment.patch @@ -0,0 +1,48 @@ +From: Richard Gobert <richardbgobert@gmail.com> +Date: Tue, 30 Apr 2024 16:35:55 +0200 +Subject: [PATCH] net: gro: add flush check in udp_gro_receive_segment + +GRO-GSO path is supposed to be transparent and as such L3 flush checks are +relevant to all UDP flows merging in GRO. This patch uses the same logic +and code from tcp_gro_receive, terminating merge if flush is non zero. + +Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") +Signed-off-by: Richard Gobert <richardbgobert@gmail.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +--- + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -463,6 +463,7 @@ static struct sk_buff *udp_gro_receive_s + struct sk_buff *p; + unsigned int ulen; + int ret = 0; ++ int flush; + + /* requires non zero csum, for symmetry with GSO */ + if (!uh->check) { +@@ -496,13 +497,22 @@ static struct sk_buff *udp_gro_receive_s + return p; + } + ++ flush = NAPI_GRO_CB(p)->flush; ++ ++ if (NAPI_GRO_CB(p)->flush_id != 1 || ++ NAPI_GRO_CB(p)->count != 1 || ++ !NAPI_GRO_CB(p)->is_atomic) ++ flush |= NAPI_GRO_CB(p)->flush_id; ++ else ++ NAPI_GRO_CB(p)->is_atomic = false; ++ + /* Terminate the flow on len mismatch or if it grow "too much". + * Under small packet flood GRO count could elsewhere grow a lot + * leading to excessive truesize values. + * On len mismatch merge the first packet shorter than gso_size, + * otherwise complete the GRO packet. + */ +- if (ulen > ntohs(uh2->len)) { ++ if (ulen > ntohs(uh2->len) || flush) { + pp = p; + } else { + if (NAPI_GRO_CB(skb)->is_flist) { diff --git a/target/linux/generic/backport-6.6/600-v6.9-01-net-gro-parse-ipv6-ext-headers-without-frag0-invalid.patch b/target/linux/generic/backport-6.6/600-v6.9-01-net-gro-parse-ipv6-ext-headers-without-frag0-invalid.patch new file mode 100644 index 0000000000..d0fed02005 --- /dev/null +++ b/target/linux/generic/backport-6.6/600-v6.9-01-net-gro-parse-ipv6-ext-headers-without-frag0-invalid.patch @@ -0,0 +1,107 @@ +From: Richard Gobert <richardbgobert@gmail.com> +Date: Wed, 3 Jan 2024 15:44:21 +0100 +Subject: [PATCH] net: gro: parse ipv6 ext headers without frag0 invalidation + +The existing code always pulls the IPv6 header and sets the transport +offset initially. Then optionally again pulls any extension headers in +ipv6_gso_pull_exthdrs and sets the transport offset again on return from +that call. skb->data is set at the start of the first extension header +before calling ipv6_gso_pull_exthdrs, and must disable the frag0 +optimization because that function uses pskb_may_pull/pskb_pull instead of +skb_gro_ helpers. It sets the GRO offset to the TCP header with +skb_gro_pull and sets the transport header. Then returns skb->data to its +position before this block. + +This commit introduces a new helper function - ipv6_gro_pull_exthdrs - +which is used in ipv6_gro_receive to pull ipv6 ext headers instead of +ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all +operations use skb_gro_* helpers, and the frag0 fast path can be taken for +IPv6 packets with ext headers. + +Signed-off-by: Richard Gobert <richardbgobert@gmail.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Reviewed-by: David Ahern <dsahern@kernel.org> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://lore.kernel.org/r/504130f6-b56c-4dcc-882c-97942c59f5b7@gmail.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +--- + +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -37,6 +37,40 @@ + INDIRECT_CALL_L4(cb, f2, f1, head, skb); \ + }) + ++static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto) ++{ ++ const struct net_offload *ops = NULL; ++ struct ipv6_opt_hdr *opth; ++ ++ for (;;) { ++ int len; ++ ++ ops = rcu_dereference(inet6_offloads[proto]); ++ ++ if (unlikely(!ops)) ++ break; ++ ++ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) ++ break; ++ ++ opth = skb_gro_header(skb, off + sizeof(*opth), off); ++ if (unlikely(!opth)) ++ break; ++ ++ len = ipv6_optlen(opth); ++ ++ opth = skb_gro_header(skb, off + len, off); ++ if (unlikely(!opth)) ++ break; ++ proto = opth->nexthdr; ++ ++ off += len; ++ } ++ ++ skb_gro_pull(skb, off - skb_network_offset(skb)); ++ return proto; ++} ++ + static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) + { + const struct net_offload *ops = NULL; +@@ -206,28 +240,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff * + goto out; + + skb_set_network_header(skb, off); +- skb_gro_pull(skb, sizeof(*iph)); +- skb_set_transport_header(skb, skb_gro_offset(skb)); + +- flush += ntohs(iph->payload_len) != skb_gro_len(skb); ++ flush += ntohs(iph->payload_len) != skb->len - hlen; + + proto = iph->nexthdr; + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) { +- pskb_pull(skb, skb_gro_offset(skb)); +- skb_gro_frag0_invalidate(skb); +- proto = ipv6_gso_pull_exthdrs(skb, proto); +- skb_gro_pull(skb, -skb_transport_offset(skb)); +- skb_reset_transport_header(skb); +- __skb_push(skb, skb_gro_offset(skb)); ++ proto = ipv6_gro_pull_exthdrs(skb, hlen, proto); + + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + +- iph = ipv6_hdr(skb); ++ iph = skb_gro_network_header(skb); ++ } else { ++ skb_gro_pull(skb, sizeof(*iph)); + } + ++ skb_set_transport_header(skb, skb_gro_offset(skb)); ++ + NAPI_GRO_CB(skb)->proto = proto; + + flush--; diff --git a/target/linux/generic/backport-6.6/600-v6.9-02-net-gro-fix-udp-bad-offset-in-socket-lookup-by-addin.patch b/target/linux/generic/backport-6.6/600-v6.9-02-net-gro-fix-udp-bad-offset-in-socket-lookup-by-addin.patch new file mode 100644 index 0000000000..c5d8497dee --- /dev/null +++ b/target/linux/generic/backport-6.6/600-v6.9-02-net-gro-fix-udp-bad-offset-in-socket-lookup-by-addin.patch @@ -0,0 +1,178 @@ +From: Richard Gobert <richardbgobert@gmail.com> +Date: Tue, 30 Apr 2024 16:35:54 +0200 +Subject: [PATCH] net: gro: fix udp bad offset in socket lookup by adding + {inner_}network_offset to napi_gro_cb + +Commits a602456 ("udp: Add GRO functions to UDP socket") and 57c67ff ("udp: +additional GRO support") introduce incorrect usage of {ip,ipv6}_hdr in the +complete phase of gro. The functions always return skb->network_header, +which in the case of encapsulated packets at the gro complete phase, is +always set to the innermost L3 of the packet. That means that calling +{ip,ipv6}_hdr for skbs which completed the GRO receive phase (both in +gro_list and *_gro_complete) when parsing an encapsulated packet's _outer_ +L3/L4 may return an unexpected value. + +This incorrect usage leads to a bug in GRO's UDP socket lookup. +udp{4,6}_lib_lookup_skb functions use ip_hdr/ipv6_hdr respectively. These +*_hdr functions return network_header which will point to the innermost L3, +resulting in the wrong offset being used in __udp{4,6}_lib_lookup with +encapsulated packets. + +This patch adds network_offset and inner_network_offset to napi_gro_cb, and +makes sure both are set correctly. + +To fix the issue, network_offsets union is used inside napi_gro_cb, in +which both the outer and the inner network offsets are saved. + +Reproduction example: + +Endpoint configuration example (fou + local address bind) + + # ip fou add port 6666 ipproto 4 + # ip link add name tun1 type ipip remote 2.2.2.1 local 2.2.2.2 encap fou encap-dport 5555 encap-sport 6666 mode ipip + # ip link set tun1 up + # ip a add 1.1.1.2/24 dev tun1 + +Netperf TCP_STREAM result on net-next before patch is applied: + +net-next main, GRO enabled: + $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 + Recv Send Send + Socket Socket Message Elapsed + Size Size Size Time Throughput + bytes bytes bytes secs. 10^6bits/sec + + 131072 16384 16384 5.28 2.37 + +net-next main, GRO disabled: + $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 + Recv Send Send + Socket Socket Message Elapsed + Size Size Size Time Throughput + bytes bytes bytes secs. 10^6bits/sec + + 131072 16384 16384 5.01 2745.06 + +patch applied, GRO enabled: + $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 + Recv Send Send + Socket Socket Message Elapsed + Size Size Size Time Throughput + bytes bytes bytes secs. 10^6bits/sec + + 131072 16384 16384 5.01 2877.38 + +Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket") +Signed-off-by: Richard Gobert <richardbgobert@gmail.com> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +--- + +--- a/include/net/gro.h ++++ b/include/net/gro.h +@@ -86,6 +86,15 @@ struct napi_gro_cb { + + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ + __wsum csum; ++ ++ /* L3 offsets */ ++ union { ++ struct { ++ u16 network_offset; ++ u16 inner_network_offset; ++ }; ++ u16 network_offsets[2]; ++ }; + }; + + #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) +--- a/net/8021q/vlan_core.c ++++ b/net/8021q/vlan_core.c +@@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive( + if (unlikely(!vhdr)) + goto out; + ++ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen; ++ + type = vhdr->h_vlan_encapsulated_proto; + + ptype = gro_find_receive_by_type(type); +--- a/net/core/gro.c ++++ b/net/core/gro.c +@@ -373,6 +373,7 @@ static inline void skb_gro_reset_offset( + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + ++ NAPI_GRO_CB(skb)->network_offset = 0; + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1571,6 +1571,7 @@ struct sk_buff *inet_gro_receive(struct + /* The above will be needed by the transport layer if there is one + * immediately following this IP hdr. + */ ++ NAPI_GRO_CB(skb)->inner_network_offset = off; + + /* Note : No need to call skb_gro_postpull_rcsum() here, + * as we already checked checksum over ipv4 header was 0 +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -534,7 +534,8 @@ static inline struct sock *__udp4_lib_lo + struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, + __be16 sport, __be16 dport) + { +- const struct iphdr *iph = ip_hdr(skb); ++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; ++ const struct iphdr *iph = (struct iphdr *)(skb->data + offset); + struct net *net = dev_net(skb->dev); + int iif, sdif; + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -718,7 +718,8 @@ EXPORT_SYMBOL(udp_gro_complete); + + INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) + { +- const struct iphdr *iph = ip_hdr(skb); ++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; ++ const struct iphdr *iph = (struct iphdr *)(skb->data + offset); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + /* do fraglist only if there is no outer UDP encap (or we already processed it) */ +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -240,6 +240,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff * + goto out; + + skb_set_network_header(skb, off); ++ NAPI_GRO_CB(skb)->inner_network_offset = off; + + flush += ntohs(iph->payload_len) != skb->len - hlen; + +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -275,7 +275,8 @@ static struct sock *__udp6_lib_lookup_sk + struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, + __be16 sport, __be16 dport) + { +- const struct ipv6hdr *iph = ipv6_hdr(skb); ++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; ++ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); + struct net *net = dev_net(skb->dev); + int iif, sdif; + +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -164,7 +164,8 @@ flush: + + INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) + { +- const struct ipv6hdr *ipv6h = ipv6_hdr(skb); ++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; ++ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + /* do fraglist only if there is no outer UDP encap (or we already processed it) */ diff --git a/target/linux/generic/backport-6.6/600-v6.9-03-net-gro-add-flush-check-in-udp_gro_receive_segment.patch b/target/linux/generic/backport-6.6/600-v6.9-03-net-gro-add-flush-check-in-udp_gro_receive_segment.patch new file mode 100644 index 0000000000..72b76dd938 --- /dev/null +++ b/target/linux/generic/backport-6.6/600-v6.9-03-net-gro-add-flush-check-in-udp_gro_receive_segment.patch @@ -0,0 +1,48 @@ +From: Richard Gobert <richardbgobert@gmail.com> +Date: Tue, 30 Apr 2024 16:35:55 +0200 +Subject: [PATCH] net: gro: add flush check in udp_gro_receive_segment + +GRO-GSO path is supposed to be transparent and as such L3 flush checks are +relevant to all UDP flows merging in GRO. This patch uses the same logic +and code from tcp_gro_receive, terminating merge if flush is non zero. + +Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") +Signed-off-by: Richard Gobert <richardbgobert@gmail.com> +Reviewed-by: Willem de Bruijn <willemb@google.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +--- + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -471,6 +471,7 @@ static struct sk_buff *udp_gro_receive_s + struct sk_buff *p; + unsigned int ulen; + int ret = 0; ++ int flush; + + /* requires non zero csum, for symmetry with GSO */ + if (!uh->check) { +@@ -504,13 +505,22 @@ static struct sk_buff *udp_gro_receive_s + return p; + } + ++ flush = NAPI_GRO_CB(p)->flush; ++ ++ if (NAPI_GRO_CB(p)->flush_id != 1 || ++ NAPI_GRO_CB(p)->count != 1 || ++ !NAPI_GRO_CB(p)->is_atomic) ++ flush |= NAPI_GRO_CB(p)->flush_id; ++ else ++ NAPI_GRO_CB(p)->is_atomic = false; ++ + /* Terminate the flow on len mismatch or if it grow "too much". + * Under small packet flood GRO count could elsewhere grow a lot + * leading to excessive truesize values. + * On len mismatch merge the first packet shorter than gso_size, + * otherwise complete the GRO packet. + */ +- if (ulen > ntohs(uh2->len)) { ++ if (ulen > ntohs(uh2->len) || flush) { + pp = p; + } else { + if (NAPI_GRO_CB(skb)->is_flist) { diff --git a/target/linux/generic/pending-6.6/680-net-add-TCP-fraglist-GRO-support.patch b/target/linux/generic/pending-6.6/680-net-add-TCP-fraglist-GRO-support.patch index 6205901707..7af7d8830c 100644 --- a/target/linux/generic/pending-6.6/680-net-add-TCP-fraglist-GRO-support.patch +++ b/target/linux/generic/pending-6.6/680-net-add-TCP-fraglist-GRO-support.patch @@ -21,7 +21,7 @@ Signe-off-by: Felix Fietkau <nbd@nbd.name> --- a/include/net/gro.h +++ b/include/net/gro.h -@@ -430,6 +430,7 @@ static inline __wsum ip6_gro_compute_pse +@@ -439,6 +439,7 @@ static inline __wsum ip6_gro_compute_pse } int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);