From 8f95b435b62522aed3381aaea920de8d09ccabf3 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 27 Jan 2015 11:53:12 +0100 Subject: [PATCH] perf: Fix move_group() order Jiri reported triggering the new WARN_ON_ONCE in event_sched_out over the weekend: event_sched_out.isra.79+0x2b9/0x2d0 group_sched_out+0x69/0xc0 ctx_sched_out+0x106/0x130 task_ctx_sched_out+0x37/0x70 __perf_install_in_context+0x70/0x1a0 remote_function+0x48/0x60 generic_exec_single+0x15b/0x1d0 smp_call_function_single+0x67/0xa0 task_function_call+0x53/0x80 perf_install_in_context+0x8b/0x110 I think the below should cure this; if we install a group leader it will iterate the (still intact) group list and find its siblings and try and install those too -- even though those still have the old event->ctx -- in the new ctx. Upon installing the first group sibling we'd try and schedule out the group and trigger the above warn. Fix this by installing the group leader last, installing siblings would have no effect, they're not reachable through the group lists and therefore we don't schedule them. Also delay resetting the state until we're absolutely sure the events are quiescent. Reported-by: Jiri Olsa Reported-by: vincent.weaver@maine.edu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150126162639.GA21418@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 56 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 417a96bf3d41..142dbabc1615 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7645,16 +7645,9 @@ SYSCALL_DEFINE5(perf_event_open, perf_remove_from_context(group_leader, false); - /* - * Removing from the context ends up with disabled - * event. What we want here is event in the initial - * startup state, ready to be add into new context. - */ - perf_event__state_init(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { perf_remove_from_context(sibling, false); - perf_event__state_init(sibling); put_ctx(gctx); } } else { @@ -7670,13 +7663,31 @@ SYSCALL_DEFINE5(perf_event_open, */ synchronize_rcu(); - perf_install_in_context(ctx, group_leader, group_leader->cpu); - get_ctx(ctx); + /* + * Install the group siblings before the group leader. + * + * Because a group leader will try and install the entire group + * (through the sibling list, which is still in-tact), we can + * end up with siblings installed in the wrong context. + * + * By installing siblings first we NO-OP because they're not + * reachable through the group lists. + */ list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { + perf_event__state_init(sibling); perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); } + + /* + * Removing from the context ends up with disabled + * event. What we want here is event in the initial + * startup state, ready to be add into new context. + */ + perf_event__state_init(group_leader); + perf_install_in_context(ctx, group_leader, group_leader->cpu); + get_ctx(ctx); } perf_install_in_context(ctx, event, event->cpu); @@ -7806,8 +7817,35 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) list_add(&event->migrate_entry, &events); } + /* + * Wait for the events to quiesce before re-instating them. + */ synchronize_rcu(); + /* + * Re-instate events in 2 passes. + * + * Skip over group leaders and only install siblings on this first + * pass, siblings will not get enabled without a leader, however a + * leader will enable its siblings, even if those are still on the old + * context. + */ + list_for_each_entry_safe(event, tmp, &events, migrate_entry) { + if (event->group_leader == event) + continue; + + list_del(&event->migrate_entry); + if (event->state >= PERF_EVENT_STATE_OFF) + event->state = PERF_EVENT_STATE_INACTIVE; + account_event_cpu(event, dst_cpu); + perf_install_in_context(dst_ctx, event, dst_cpu); + get_ctx(dst_ctx); + } + + /* + * Once all the siblings are setup properly, install the group leaders + * to make it go. + */ list_for_each_entry_safe(event, tmp, &events, migrate_entry) { list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) -- 2.30.2