From ad4232e3e986d89ac49f01bb2cec44d1e66d6969 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Tue, 23 Jan 2018 20:52:45 +0100
Subject: [PATCH] ar71xx: add unaligned access hacks for VXLAN

Gives a ~5% performance gain.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
---
 .../910-unaligned_access_hacks.patch          | 92 +++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch b/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch
index 92f0a67658..f26af24d14 100644
--- a/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch
+++ b/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch
@@ -886,3 +886,95 @@
  	};
  	if (skb->ip_summed != CHECKSUM_PARTIAL) {
  		*sum = csum_fold(csum_partial(diff, sizeof(diff),
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -1811,15 +1811,15 @@ static int vxlan_build_skb(struct sk_buf
+ 		goto out_free;
+ 
+ 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
+-	vxh->vx_flags = VXLAN_HF_VNI;
+-	vxh->vx_vni = vxlan_vni_field(vni);
++	net_hdr_word(&vxh->vx_flags) = VXLAN_HF_VNI;
++	net_hdr_word(&vxh->vx_vni) = vxlan_vni_field(vni);
+ 
+ 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
+ 		unsigned int start;
+ 
+ 		start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
+-		vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
+-		vxh->vx_flags |= VXLAN_HF_RCO;
++		net_hdr_word(&vxh->vx_vni) |= vxlan_compute_rco(start, skb->csum_offset);
++		net_hdr_word(&vxh->vx_flags) |= VXLAN_HF_RCO;
+ 
+ 		if (!skb_is_gso(skb)) {
+ 			skb->ip_summed = CHECKSUM_NONE;
+--- a/include/linux/etherdevice.h
++++ b/include/linux/etherdevice.h
+@@ -435,7 +435,7 @@ static inline bool is_etherdev_addr(cons
+  * @b: Pointer to Ethernet header
+  *
+  * Compare two Ethernet headers, returns 0 if equal.
+- * This assumes that the network header (i.e., IP header) is 4-byte
++ * This assumes that the network header (i.e., IP header) is 2-byte
+  * aligned OR the platform can handle unaligned access.  This is the
+  * case for all packets coming into netif_receive_skb or similar
+  * entry points.
+@@ -458,11 +458,12 @@ static inline unsigned long compare_ethe
+ 	fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6);
+ 	return fold;
+ #else
+-	u32 *a32 = (u32 *)((u8 *)a + 2);
+-	u32 *b32 = (u32 *)((u8 *)b + 2);
++	const u16 *a16 = a;
++	const u16 *b16 = b;
+ 
+-	return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) |
+-	       (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]);
++	return (a16[0] ^ b16[0]) | (a16[1] ^ b16[1]) | (a16[2] ^ b16[2]) |
++	       (a16[3] ^ b16[3]) | (a16[4] ^ b16[4]) | (a16[5] ^ b16[5]) |
++	       (a16[6] ^ b16[6]);
+ #endif
+ }
+ 
+--- a/net/ipv4/tcp_offload.c
++++ b/net/ipv4/tcp_offload.c
+@@ -215,7 +215,7 @@ struct sk_buff **tcp_gro_receive(struct
+ 
+ 		th2 = tcp_hdr(p);
+ 
+-		if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
++		if (net_hdr_word(&th->source) ^ net_hdr_word(&th2->source)) {
+ 			NAPI_GRO_CB(p)->same_flow = 0;
+ 			continue;
+ 		}
+@@ -233,8 +233,8 @@ found:
+ 		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
+ 	flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
+ 	for (i = sizeof(*th); i < thlen; i += 4)
+-		flush |= *(u32 *)((u8 *)th + i) ^
+-			 *(u32 *)((u8 *)th2 + i);
++		flush |= net_hdr_word((u8 *)th + i) ^
++			 net_hdr_word((u8 *)th2 + i);
+ 
+ 	/* When we receive our second frame we can made a decision on if we
+ 	 * continue this flow as an atomic flow with a fixed ID or if we use
+--- a/net/ipv6/netfilter/ip6table_mangle.c
++++ b/net/ipv6/netfilter/ip6table_mangle.c
+@@ -58,7 +58,7 @@ ip6t_mangle_out(struct sk_buff *skb, con
+ 	hop_limit = ipv6_hdr(skb)->hop_limit;
+ 
+ 	/* flowlabel and prio (includes version, which shouldn't change either */
+-	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
++	flowlabel = net_hdr_word(ipv6_hdr(skb));
+ 
+ 	ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
+ 
+@@ -67,7 +67,7 @@ ip6t_mangle_out(struct sk_buff *skb, con
+ 	     !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
+ 	     skb->mark != mark ||
+ 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
+-	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
++	     flowlabel != net_hdr_word(ipv6_hdr(skb)))) {
+ 		err = ip6_route_me_harder(state->net, skb);
+ 		if (err < 0)
+ 			ret = NF_DROP_ERR(err);
-- 
2.30.2