From cb42c9a3ebbbb23448c3f9a25417fae6309b1a92 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 21 Sep 2016 14:38:13 +0100 Subject: [PATCH] sched/core: Add debugging code to catch missing update_rq_clock() calls There's no diagnostic checks for figuring out when we've accidentally missed update_rq_clock() calls. Let's add some by piggybacking on the rq_*pin_lock() wrappers. The idea behind the diagnostic checks is that upon pining rq lock the rq clock should be updated, via update_rq_clock(), before anybody reads the clock with rq_clock() or rq_clock_task(). The exception to this rule is when updates have explicitly been disabled with the rq_clock_skip_update() optimisation. There are some functions that only unpin the rq lock in order to grab some other lock and avoid deadlock. In that case we don't need to update the clock again and the previous diagnostic state can be carried over in rq_repin_lock() by saving the state in the rq_flags context. Since this patch adds a new clock update flag and some already exist in rq::clock_skip_update, that field has now been renamed. An attempt has been made to keep the flag manipulation code small and fast since it's used in the heart of the __schedule() fast path. For the !CONFIG_SCHED_DEBUG case the only object code change (other than addresses) is the following change to reset RQCF_ACT_SKIP inside of __schedule(), - c7 83 38 09 00 00 00 movl $0x0,0x938(%rbx) - 00 00 00 + 83 a3 38 09 00 00 fc andl $0xfffffffc,0x938(%rbx) Suggested-by: Peter Zijlstra Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Frederic Weisbecker Cc: Jan Kara Cc: Linus Torvalds Cc: Luca Abeni Cc: Mel Gorman Cc: Mike Galbraith Cc: Mike Galbraith Cc: Petr Mladek Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yuyang Du Link: http://lkml.kernel.org/r/20160921133813.31976-8-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 11 ++++--- kernel/sched/sched.h | 74 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d2338927773a..a129b34b8206 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -102,9 +102,12 @@ void update_rq_clock(struct rq *rq) lockdep_assert_held(&rq->lock); - if (rq->clock_skip_update & RQCF_ACT_SKIP) + if (rq->clock_update_flags & RQCF_ACT_SKIP) return; +#ifdef CONFIG_SCHED_DEBUG + rq->clock_update_flags |= RQCF_UPDATED; +#endif delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; if (delta < 0) return; @@ -2889,7 +2892,7 @@ context_switch(struct rq *rq, struct task_struct *prev, rq->prev_mm = oldmm; } - rq->clock_skip_update = 0; + rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); /* * Since the runqueue lock will be released by the next @@ -3364,7 +3367,7 @@ static void __sched notrace __schedule(bool preempt) raw_spin_lock(&rq->lock); rq_pin_lock(rq, &rf); - rq->clock_skip_update <<= 1; /* promote REQ to ACT */ + rq->clock_update_flags <<= 1; /* promote REQ to ACT */ switch_count = &prev->nivcsw; if (!preempt && prev->state) { @@ -3405,7 +3408,7 @@ static void __sched notrace __schedule(bool preempt) trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next, &rf); /* unlocks the rq */ } else { - rq->clock_skip_update = 0; + rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); rq_unpin_lock(rq, &rf); raw_spin_unlock_irq(&rq->lock); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98e7eee07237..6eeae7ebd99b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -644,7 +644,7 @@ struct rq { unsigned long next_balance; struct mm_struct *prev_mm; - unsigned int clock_skip_update; + unsigned int clock_update_flags; u64 clock; u64 clock_task; @@ -768,48 +768,110 @@ static inline u64 __rq_clock_broken(struct rq *rq) return READ_ONCE(rq->clock); } +/* + * rq::clock_update_flags bits + * + * %RQCF_REQ_SKIP - will request skipping of clock update on the next + * call to __schedule(). This is an optimisation to avoid + * neighbouring rq clock updates. + * + * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is + * in effect and calls to update_rq_clock() are being ignored. + * + * %RQCF_UPDATED - is a debug flag that indicates whether a call has been + * made to update_rq_clock() since the last time rq::lock was pinned. + * + * If inside of __schedule(), clock_update_flags will have been + * shifted left (a left shift is a cheap operation for the fast path + * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use, + * + * if (rq-clock_update_flags >= RQCF_UPDATED) + * + * to check if %RQCF_UPADTED is set. It'll never be shifted more than + * one position though, because the next rq_unpin_lock() will shift it + * back. + */ +#define RQCF_REQ_SKIP 0x01 +#define RQCF_ACT_SKIP 0x02 +#define RQCF_UPDATED 0x04 + +static inline void assert_clock_updated(struct rq *rq) +{ + /* + * The only reason for not seeing a clock update since the + * last rq_pin_lock() is if we're currently skipping updates. + */ + SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP); +} + static inline u64 rq_clock(struct rq *rq) { lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + return rq->clock; } static inline u64 rq_clock_task(struct rq *rq) { lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + return rq->clock_task; } -#define RQCF_REQ_SKIP 0x01 -#define RQCF_ACT_SKIP 0x02 - static inline void rq_clock_skip_update(struct rq *rq, bool skip) { lockdep_assert_held(&rq->lock); if (skip) - rq->clock_skip_update |= RQCF_REQ_SKIP; + rq->clock_update_flags |= RQCF_REQ_SKIP; else - rq->clock_skip_update &= ~RQCF_REQ_SKIP; + rq->clock_update_flags &= ~RQCF_REQ_SKIP; } struct rq_flags { unsigned long flags; struct pin_cookie cookie; +#ifdef CONFIG_SCHED_DEBUG + /* + * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the + * current pin context is stashed here in case it needs to be + * restored in rq_repin_lock(). + */ + unsigned int clock_update_flags; +#endif }; static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) { rf->cookie = lockdep_pin_lock(&rq->lock); + +#ifdef CONFIG_SCHED_DEBUG + rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); + rf->clock_update_flags = 0; +#endif } static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) { +#ifdef CONFIG_SCHED_DEBUG + if (rq->clock_update_flags > RQCF_ACT_SKIP) + rf->clock_update_flags = RQCF_UPDATED; +#endif + lockdep_unpin_lock(&rq->lock, rf->cookie); } static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf) { lockdep_repin_lock(&rq->lock, rf->cookie); + +#ifdef CONFIG_SCHED_DEBUG + /* + * Restore the value we stashed in @rf for this pin context. + */ + rq->clock_update_flags |= rf->clock_update_flags; +#endif } #ifdef CONFIG_NUMA -- 2.30.2