From c85b03349640b34f3545503c8429fc43005e9a92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 13:06:21 -0300 Subject: [PATCH] perf core: Separate accounting of contexts and real addresses in a stack trace The perf_sample->ip_callchain->nr value includes all the entries in the ip_callchain->ip[] array, real addresses and PERF_CONTEXT_{KERNEL,USER,etc}, while what the user expects is that what is in the kernel.perf_event_max_stack sysctl or in the upcoming per event perf_event_attr.sample_max_stack knob be honoured in terms of IP addresses in the stack trace. So allocate a bunch of extra entries for contexts, and do the accounting via perf_callchain_entry_ctx struct members. A new sysctl, kernel.perf_event_max_contexts_per_stack is also introduced for investigating possible bugs in the callchain implementation by some arch. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-3b4wnqk340c4sg4gwkfdi9yk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/sysctl/kernel.txt | 14 ++++++++++++++ include/linux/perf_event.h | 18 ++++++++++++++++-- include/uapi/linux/perf_event.h | 1 + kernel/events/callchain.c | 10 +++++++++- kernel/sysctl.c | 9 +++++++++ 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index daabdd7ee543..a3683ce2a2f3 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -61,6 +61,7 @@ show up in /proc/sys/kernel: - perf_cpu_time_max_percent - perf_event_paranoid - perf_event_max_stack +- perf_event_max_contexts_per_stack - pid_max - powersave-nap [ PPC only ] - printk @@ -668,6 +669,19 @@ The default value is 127. ============================================================== +perf_event_max_contexts_per_stack: + +Controls maximum number of stack frame context entries for +(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for +instance, when using 'perf record -g' or 'perf trace --call-graph fp'. + +This can only be done when no events are in use that have callchains +enabled, otherwise writing to this file will return -EBUSY. + +The default value is 8. + +============================================================== + pid_max: PID allocation wrap value. When the kernel's next PID value diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2024b14cc2b1..6b87be908790 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -65,6 +65,8 @@ struct perf_callchain_entry_ctx { struct perf_callchain_entry *entry; u32 max_stack; u32 nr; + short contexts; + bool contexts_maxed; }; struct perf_raw_record { @@ -1078,12 +1080,24 @@ extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; -#define perf_callchain_store_context(ctx, context) perf_callchain_store(ctx, context) +static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip) +{ + if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { + struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; + ++ctx->contexts; + return 0; + } else { + ctx->contexts_maxed = true; + return -1; /* no more room, stop walking the stack */ + } +} static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (ctx->nr < ctx->max_stack) { + if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { struct perf_callchain_entry *entry = ctx->entry; entry->ip[entry->nr++] = ip; ++ctx->nr; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 43fc8d213472..36ce552cf6a9 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -862,6 +862,7 @@ enum perf_event_type { }; #define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index ca645736a983..179ef4640964 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -19,11 +19,13 @@ struct callchain_cpus_entries { }; int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; +int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK; static inline size_t perf_callchain_entry__sizeof(void) { return (sizeof(struct perf_callchain_entry) + - sizeof(__u64) * sysctl_perf_event_max_stack); + sizeof(__u64) * (sysctl_perf_event_max_stack + + sysctl_perf_event_max_contexts_per_stack)); } static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); @@ -197,6 +199,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, ctx.entry = entry; ctx.max_stack = max_stack; ctx.nr = entry->nr = init_nr; + ctx.contexts = 0; + ctx.contexts_maxed = false; if (kernel && !user_mode(regs)) { if (add_mark) @@ -228,6 +232,10 @@ exit_put: return entry; } +/* + * Used for sysctl_perf_event_max_stack and + * sysctl_perf_event_max_contexts_per_stack. + */ int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0ec6907a16b3..bec4c11c47d6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1156,6 +1156,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &six_hundred_forty_kb, }, + { + .procname = "perf_event_max_contexts_per_stack", + .data = &sysctl_perf_event_max_contexts_per_stack, + .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack), + .mode = 0644, + .proc_handler = perf_event_max_stack_handler, + .extra1 = &zero, + .extra2 = &one_thousand, + }, #endif #ifdef CONFIG_KMEMCHECK { -- 2.30.2