From: Eric Dumazet Date: Tue, 7 Aug 2012 02:19:56 +0000 (+0000) Subject: net: output path optimizations X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=425f09ab7d1c9da6ca4137dd639cb6fe3f8a88f3;p=openwrt%2Fstaging%2Fblogic.git net: output path optimizations 1) Avoid dirtying neighbour's confirmed field. TCP workloads hits this cache line for each incoming ACK. Lets write n->confirmed only if there is a jiffie change. 2) Optimize neigh_hh_output() for the common Ethernet case, were hh_len is less than 16 bytes. Replace the memcpy() call by two inlined 64bit load/stores on x86_64. Bench results using udpflood test, with -C option (MSG_CONFIRM flag added to sendto(), to reproduce the n->confirmed dirtying on UDP) 24 threads doing 1.000.000 UDP sendto() on dummy device, 4 runs. before : 2.247s, 2.235s, 2.247s, 2.318s after : 1.884s, 1.905s, 1.891s, 1.895s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- diff --git a/include/net/dst.h b/include/net/dst.h index baf597890064..77f52f7dc823 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -396,11 +396,15 @@ static inline void dst_confirm(struct dst_entry *dst) static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, struct sk_buff *skb) { - struct hh_cache *hh; + const struct hh_cache *hh; + + if (dst->pending_confirm) { + unsigned long now = jiffies; - if (unlikely(dst->pending_confirm)) { - n->confirmed = jiffies; dst->pending_confirm = 0; + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; } hh = &n->hh; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 344d8988842a..0dab173e27da 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -334,18 +334,22 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) } #endif -static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) +static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq; int hh_len; do { - int hh_alen; - seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; - hh_alen = HH_DATA_ALIGN(hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + if (likely(hh_len <= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); + } else { + int hh_alen = HH_DATA_ALIGN(hh_len); + + memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + } } while (read_seqretry(&hh->hh_lock, seq)); skb_push(skb, hh_len);