From a7a2563064e963bc5e3f39f533974f2730c0ff56 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Oct 2018 09:37:54 -0700 Subject: [PATCH] tcp: mitigate scheduling jitter in EDT pacing model In commit fefa569a9d4b ("net_sched: sch_fq: account for schedule/timers drifts") we added a mitigation for scheduling jitter in fq packet scheduler. This patch does the same in TCP stack, now it is using EDT model. Note that this mitigation is valid for both external (fq packet scheduler) or internal TCP pacing. This uses the same strategy than the above commit, allowing a time credit of half the packet currently sent. Consider following case : An skb is sent, after an idle period of 300 usec. The air-time (skb->len/pacing_rate) is 500 usec Instead of setting the pacing timer to now+500 usec, it will use now+min(500/2, 300) -> now+250usec This is like having a token bucket with a depth of half an skb. Tested: tc qdisc replace dev eth0 root pfifo_fast Before netperf -P0 -H remote -- -q 1000000000 # 8000Mbit 540000 262144 262144 10.00 7710.43 After : netperf -P0 -H remote -- -q 1000000000 # 8000 Mbit 540000 262144 262144 10.00 7999.75 # Much closer to 8000Mbit target Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f4aa4109334a..5474c9854f25 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -985,7 +985,8 @@ static void tcp_internal_pacing(struct sock *sk) sock_hold(sk); } -static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb) +static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, + u64 prior_wstamp) { struct tcp_sock *tp = tcp_sk(sk); @@ -998,7 +999,12 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb) * this is a minor annoyance. */ if (rate != ~0UL && rate && tp->data_segs_out >= 10) { - tp->tcp_wstamp_ns += div64_ul((u64)skb->len * NSEC_PER_SEC, rate); + u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate); + u64 credit = tp->tcp_wstamp_ns - prior_wstamp; + + /* take into account OS jitter */ + len_ns -= min_t(u64, len_ns / 2, credit); + tp->tcp_wstamp_ns += len_ns; tcp_internal_pacing(sk); } @@ -1029,6 +1035,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, struct sk_buff *oskb = NULL; struct tcp_md5sig_key *md5; struct tcphdr *th; + u64 prior_wstamp; int err; BUG_ON(!skb || !tcp_skb_pcount(skb)); @@ -1050,7 +1057,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, return -ENOBUFS; } - /* TODO: might take care of jitter here */ + prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); skb->skb_mstamp_ns = tp->tcp_wstamp_ns; @@ -1169,7 +1176,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, err = net_xmit_eval(err); } if (!err && oskb) { - tcp_update_skb_after_send(sk, oskb); + tcp_update_skb_after_send(sk, oskb, prior_wstamp); tcp_rate_skb_sent(sk, oskb); } return err; @@ -2321,7 +2328,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp" is used as a start point for the retransmit timer */ - tcp_update_skb_after_send(sk, skb); + tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns); goto repair; /* Skip network transmission */ } @@ -2896,7 +2903,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) } tcp_skb_tsorted_restore(skb); if (!err) { - tcp_update_skb_after_send(sk, skb); + tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns); tcp_rate_skb_sent(sk, skb); } } else { -- 2.30.2