Merge branch 'x86/cleanups' into x86/urgent

author Ingo Molnar <mingo@kernel.org>

Thu, 17 Mar 2016 08:44:57 +0000 (09:44 +0100)

committer Ingo Molnar <mingo@kernel.org>

Thu, 17 Mar 2016 08:44:57 +0000 (09:44 +0100)
author Ingo Molnar <mingo@kernel.org>
Thu, 17 Mar 2016 08:44:57 +0000 (09:44 +0100)
committer Ingo Molnar <mingo@kernel.org>
Thu, 17 Mar 2016 08:44:57 +0000 (09:44 +0100)
diff --cc arch/x86/events/intel/lbr.c

index 69dd11887dd1e1a50d63405f1a508a3ed759e8cc,0000000000000000000000000000000000000000..6c3b7c1780c983627d866584f4c2c6c2379cc65a

mode 100644,000000..100644
--- 1/arch/x86/events/intel/lbr.c
--- /dev/null
+++ b/arch/x86/events/intel/lbr.c
@@@ -1,1062 -1,0 +1,1062 @@@
-  * intruction is not necessarily a branch (in case of interrupt).
+ +#include <linux/perf_event.h>
+ +#include <linux/types.h>
+ +
+ +#include <asm/perf_event.h>
+ +#include <asm/msr.h>
+ +#include <asm/insn.h>
+ +
+ +#include "../perf_event.h"
+ +
+ +enum {
+ +      LBR_FORMAT_32           = 0x00,
+ +      LBR_FORMAT_LIP          = 0x01,
+ +      LBR_FORMAT_EIP          = 0x02,
+ +      LBR_FORMAT_EIP_FLAGS    = 0x03,
+ +      LBR_FORMAT_EIP_FLAGS2   = 0x04,
+ +      LBR_FORMAT_INFO         = 0x05,
+ +      LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
+ +};
+ +
+ +static enum {
+ +      LBR_EIP_FLAGS           = 1,
+ +      LBR_TSX                 = 2,
+ +} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+ +      [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
+ +      [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
+ +};
+ +
+ +/*
+ + * Intel LBR_SELECT bits
+ + * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ + *
+ + * Hardware branch filter (not available on all CPUs)
+ + */
+ +#define LBR_KERNEL_BIT                0 /* do not capture at ring0 */
+ +#define LBR_USER_BIT          1 /* do not capture at ring > 0 */
+ +#define LBR_JCC_BIT           2 /* do not capture conditional branches */
+ +#define LBR_REL_CALL_BIT      3 /* do not capture relative calls */
+ +#define LBR_IND_CALL_BIT      4 /* do not capture indirect calls */
+ +#define LBR_RETURN_BIT                5 /* do not capture near returns */
+ +#define LBR_IND_JMP_BIT               6 /* do not capture indirect jumps */
+ +#define LBR_REL_JMP_BIT               7 /* do not capture relative jumps */
+ +#define LBR_FAR_BIT           8 /* do not capture far branches */
+ +#define LBR_CALL_STACK_BIT    9 /* enable call stack */
+ +
+ +/*
+ + * Following bit only exists in Linux; we mask it out before writing it to
+ + * the actual MSR. But it helps the constraint perf code to understand
+ + * that this is a separate configuration.
+ + */
+ +#define LBR_NO_INFO_BIT              63 /* don't read LBR_INFO. */
+ +
+ +#define LBR_KERNEL    (1 << LBR_KERNEL_BIT)
+ +#define LBR_USER      (1 << LBR_USER_BIT)
+ +#define LBR_JCC               (1 << LBR_JCC_BIT)
+ +#define LBR_REL_CALL  (1 << LBR_REL_CALL_BIT)
+ +#define LBR_IND_CALL  (1 << LBR_IND_CALL_BIT)
+ +#define LBR_RETURN    (1 << LBR_RETURN_BIT)
+ +#define LBR_REL_JMP   (1 << LBR_REL_JMP_BIT)
+ +#define LBR_IND_JMP   (1 << LBR_IND_JMP_BIT)
+ +#define LBR_FAR               (1 << LBR_FAR_BIT)
+ +#define LBR_CALL_STACK        (1 << LBR_CALL_STACK_BIT)
+ +#define LBR_NO_INFO   (1ULL << LBR_NO_INFO_BIT)
+ +
+ +#define LBR_PLM (LBR_KERNEL | LBR_USER)
+ +
+ +#define LBR_SEL_MASK  0x1ff   /* valid bits in LBR_SELECT */
+ +#define LBR_NOT_SUPP  -1      /* LBR filter not supported */
+ +#define LBR_IGN               0       /* ignored */
+ +
+ +#define LBR_ANY                \
+ +      (LBR_JCC        |\
+ +       LBR_REL_CALL   |\
+ +       LBR_IND_CALL   |\
+ +       LBR_RETURN     |\
+ +       LBR_REL_JMP    |\
+ +       LBR_IND_JMP    |\
+ +       LBR_FAR)
+ +
+ +#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
+ +#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
+ +#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
+ +
+ +/*
+ + * x86control flow change classification
+ + * x86control flow changes include branches, interrupts, traps, faults
+ + */
+ +enum {
+ +      X86_BR_NONE             = 0,      /* unknown */
+ +
+ +      X86_BR_USER             = 1 << 0, /* branch target is user */
+ +      X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
+ +
+ +      X86_BR_CALL             = 1 << 2, /* call */
+ +      X86_BR_RET              = 1 << 3, /* return */
+ +      X86_BR_SYSCALL          = 1 << 4, /* syscall */
+ +      X86_BR_SYSRET           = 1 << 5, /* syscall return */
+ +      X86_BR_INT              = 1 << 6, /* sw interrupt */
+ +      X86_BR_IRET             = 1 << 7, /* return from interrupt */
+ +      X86_BR_JCC              = 1 << 8, /* conditional */
+ +      X86_BR_JMP              = 1 << 9, /* jump */
+ +      X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
+ +      X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
+ +      X86_BR_ABORT            = 1 << 12,/* transaction abort */
+ +      X86_BR_IN_TX            = 1 << 13,/* in transaction */
+ +      X86_BR_NO_TX            = 1 << 14,/* not in transaction */
+ +      X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
+ +      X86_BR_CALL_STACK       = 1 << 16,/* call stack */
+ +      X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
+ +};
+ +
+ +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+ +#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
+ +
+ +#define X86_BR_ANY       \
+ +      (X86_BR_CALL    |\
+ +       X86_BR_RET     |\
+ +       X86_BR_SYSCALL |\
+ +       X86_BR_SYSRET  |\
+ +       X86_BR_INT     |\
+ +       X86_BR_IRET    |\
+ +       X86_BR_JCC     |\
+ +       X86_BR_JMP      |\
+ +       X86_BR_IRQ      |\
+ +       X86_BR_ABORT    |\
+ +       X86_BR_IND_CALL |\
+ +       X86_BR_IND_JMP  |\
+ +       X86_BR_ZERO_CALL)
+ +
+ +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+ +
+ +#define X86_BR_ANY_CALL                \
+ +      (X86_BR_CALL            |\
+ +       X86_BR_IND_CALL        |\
+ +       X86_BR_ZERO_CALL       |\
+ +       X86_BR_SYSCALL         |\
+ +       X86_BR_IRQ             |\
+ +       X86_BR_INT)
+ +
+ +static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+ +
+ +/*
+ + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
+ + * otherwise it becomes near impossible to get a reliable stack.
+ + */
+ +
+ +static void __intel_pmu_lbr_enable(bool pmi)
+ +{
+ +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ +      u64 debugctl, lbr_select = 0, orig_debugctl;
+ +
+ +      /*
+ +       * No need to unfreeze manually, as v4 can do that as part
+ +       * of the GLOBAL_STATUS ack.
+ +       */
+ +      if (pmi && x86_pmu.version >= 4)
+ +              return;
+ +
+ +      /*
+ +       * No need to reprogram LBR_SELECT in a PMI, as it
+ +       * did not change.
+ +       */
+ +      if (cpuc->lbr_sel)
+ +              lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
+ +      if (!pmi && cpuc->lbr_sel)
+ +              wrmsrl(MSR_LBR_SELECT, lbr_select);
+ +
+ +      rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ +      orig_debugctl = debugctl;
+ +      debugctl |= DEBUGCTLMSR_LBR;
+ +      /*
+ +       * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+ +       * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+ +       * may cause superfluous increase/decrease of LBR_TOS.
+ +       */
+ +      if (!(lbr_select & LBR_CALL_STACK))
+ +              debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+ +      if (orig_debugctl != debugctl)
+ +              wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ +}
+ +
+ +static void __intel_pmu_lbr_disable(void)
+ +{
+ +      u64 debugctl;
+ +
+ +      rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ +      debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ +      wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ +}
+ +
+ +static void intel_pmu_lbr_reset_32(void)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < x86_pmu.lbr_nr; i++)
+ +              wrmsrl(x86_pmu.lbr_from + i, 0);
+ +}
+ +
+ +static void intel_pmu_lbr_reset_64(void)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ +              wrmsrl(x86_pmu.lbr_from + i, 0);
+ +              wrmsrl(x86_pmu.lbr_to   + i, 0);
+ +              if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ +                      wrmsrl(MSR_LBR_INFO_0 + i, 0);
+ +      }
+ +}
+ +
+ +void intel_pmu_lbr_reset(void)
+ +{
+ +      if (!x86_pmu.lbr_nr)
+ +              return;
+ +
+ +      if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+ +              intel_pmu_lbr_reset_32();
+ +      else
+ +              intel_pmu_lbr_reset_64();
+ +}
+ +
+ +/*
+ + * TOS = most recently recorded branch
+ + */
+ +static inline u64 intel_pmu_lbr_tos(void)
+ +{
+ +      u64 tos;
+ +
+ +      rdmsrl(x86_pmu.lbr_tos, tos);
+ +      return tos;
+ +}
+ +
+ +enum {
+ +      LBR_NONE,
+ +      LBR_VALID,
+ +};
+ +
+ +static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+ +{
+ +      int i;
+ +      unsigned lbr_idx, mask;
+ +      u64 tos;
+ +
+ +      if (task_ctx->lbr_callstack_users == 0 ||
+ +          task_ctx->lbr_stack_state == LBR_NONE) {
+ +              intel_pmu_lbr_reset();
+ +              return;
+ +      }
+ +
+ +      mask = x86_pmu.lbr_nr - 1;
+ +      tos = task_ctx->tos;
+ +      for (i = 0; i < tos; i++) {
+ +              lbr_idx = (tos - i) & mask;
+ +              wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+ +              wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ +              if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ +                      wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+ +      }
+ +      wrmsrl(x86_pmu.lbr_tos, tos);
+ +      task_ctx->lbr_stack_state = LBR_NONE;
+ +}
+ +
+ +static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+ +{
+ +      int i;
+ +      unsigned lbr_idx, mask;
+ +      u64 tos;
+ +
+ +      if (task_ctx->lbr_callstack_users == 0) {
+ +              task_ctx->lbr_stack_state = LBR_NONE;
+ +              return;
+ +      }
+ +
+ +      mask = x86_pmu.lbr_nr - 1;
+ +      tos = intel_pmu_lbr_tos();
+ +      for (i = 0; i < tos; i++) {
+ +              lbr_idx = (tos - i) & mask;
+ +              rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+ +              rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ +              if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ +                      rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+ +      }
+ +      task_ctx->tos = tos;
+ +      task_ctx->lbr_stack_state = LBR_VALID;
+ +}
+ +
+ +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+ +{
+ +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ +      struct x86_perf_task_context *task_ctx;
+ +
+ +      /*
+ +       * If LBR callstack feature is enabled and the stack was saved when
+ +       * the task was scheduled out, restore the stack. Otherwise flush
+ +       * the LBR stack.
+ +       */
+ +      task_ctx = ctx ? ctx->task_ctx_data : NULL;
+ +      if (task_ctx) {
+ +              if (sched_in) {
+ +                      __intel_pmu_lbr_restore(task_ctx);
+ +                      cpuc->lbr_context = ctx;
+ +              } else {
+ +                      __intel_pmu_lbr_save(task_ctx);
+ +              }
+ +              return;
+ +      }
+ +
+ +      /*
+ +       * When sampling the branck stack in system-wide, it may be
+ +       * necessary to flush the stack on context switch. This happens
+ +       * when the branch stack does not tag its entries with the pid
+ +       * of the current task. Otherwise it becomes impossible to
+ +       * associate a branch entry with a task. This ambiguity is more
+ +       * likely to appear when the branch stack supports priv level
+ +       * filtering and the user sets it to monitor only at the user
+ +       * level (which could be a useful measurement in system-wide
+ +       * mode). In that case, the risk is high of having a branch
+ +       * stack with branch from multiple tasks.
+ +       */
+ +      if (sched_in) {
+ +              intel_pmu_lbr_reset();
+ +              cpuc->lbr_context = ctx;
+ +      }
+ +}
+ +
+ +static inline bool branch_user_callstack(unsigned br_sel)
+ +{
+ +      return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
+ +}
+ +
+ +void intel_pmu_lbr_enable(struct perf_event *event)
+ +{
+ +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ +      struct x86_perf_task_context *task_ctx;
+ +
+ +      if (!x86_pmu.lbr_nr)
+ +              return;
+ +
+ +      /*
+ +       * Reset the LBR stack if we changed task context to
+ +       * avoid data leaks.
+ +       */
+ +      if (event->ctx->task && cpuc->lbr_context != event->ctx) {
+ +              intel_pmu_lbr_reset();
+ +              cpuc->lbr_context = event->ctx;
+ +      }
+ +      cpuc->br_sel = event->hw.branch_reg.reg;
+ +
+ +      if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+ +                                      event->ctx->task_ctx_data) {
+ +              task_ctx = event->ctx->task_ctx_data;
+ +              task_ctx->lbr_callstack_users++;
+ +      }
+ +
+ +      cpuc->lbr_users++;
+ +      perf_sched_cb_inc(event->ctx->pmu);
+ +}
+ +
+ +void intel_pmu_lbr_disable(struct perf_event *event)
+ +{
+ +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ +      struct x86_perf_task_context *task_ctx;
+ +
+ +      if (!x86_pmu.lbr_nr)
+ +              return;
+ +
+ +      if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+ +                                      event->ctx->task_ctx_data) {
+ +              task_ctx = event->ctx->task_ctx_data;
+ +              task_ctx->lbr_callstack_users--;
+ +      }
+ +
+ +      cpuc->lbr_users--;
+ +      WARN_ON_ONCE(cpuc->lbr_users < 0);
+ +      perf_sched_cb_dec(event->ctx->pmu);
+ +
+ +      if (cpuc->enabled && !cpuc->lbr_users) {
+ +              __intel_pmu_lbr_disable();
+ +              /* avoid stale pointer */
+ +              cpuc->lbr_context = NULL;
+ +      }
+ +}
+ +
+ +void intel_pmu_lbr_enable_all(bool pmi)
+ +{
+ +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ +
+ +      if (cpuc->lbr_users)
+ +              __intel_pmu_lbr_enable(pmi);
+ +}
+ +
+ +void intel_pmu_lbr_disable_all(void)
+ +{
+ +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ +
+ +      if (cpuc->lbr_users)
+ +              __intel_pmu_lbr_disable();
+ +}
+ +
+ +static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+ +{
+ +      unsigned long mask = x86_pmu.lbr_nr - 1;
+ +      u64 tos = intel_pmu_lbr_tos();
+ +      int i;
+ +
+ +      for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ +              unsigned long lbr_idx = (tos - i) & mask;
+ +              union {
+ +                      struct {
+ +                              u32 from;
+ +                              u32 to;
+ +                      };
+ +                      u64     lbr;
+ +              } msr_lastbranch;
+ +
+ +              rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
+ +
+ +              cpuc->lbr_entries[i].from       = msr_lastbranch.from;
+ +              cpuc->lbr_entries[i].to         = msr_lastbranch.to;
+ +              cpuc->lbr_entries[i].mispred    = 0;
+ +              cpuc->lbr_entries[i].predicted  = 0;
+ +              cpuc->lbr_entries[i].reserved   = 0;
+ +      }
+ +      cpuc->lbr_stack.nr = i;
+ +}
+ +
+ +/*
+ + * Due to lack of segmentation in Linux the effective address (offset)
+ + * is the same as the linear address, allowing us to merge the LIP and EIP
+ + * LBR formats.
+ + */
+ +static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+ +{
+ +      bool need_info = false;
+ +      unsigned long mask = x86_pmu.lbr_nr - 1;
+ +      int lbr_format = x86_pmu.intel_cap.lbr_format;
+ +      u64 tos = intel_pmu_lbr_tos();
+ +      int i;
+ +      int out = 0;
+ +      int num = x86_pmu.lbr_nr;
+ +
+ +      if (cpuc->lbr_sel) {
+ +              need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+ +              if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+ +                      num = tos;
+ +      }
+ +
+ +      for (i = 0; i < num; i++) {
+ +              unsigned long lbr_idx = (tos - i) & mask;
+ +              u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+ +              int skip = 0;
+ +              u16 cycles = 0;
+ +              int lbr_flags = lbr_desc[lbr_format];
+ +
+ +              rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
+ +              rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
+ +
+ +              if (lbr_format == LBR_FORMAT_INFO && need_info) {
+ +                      u64 info;
+ +
+ +                      rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
+ +                      mis = !!(info & LBR_INFO_MISPRED);
+ +                      pred = !mis;
+ +                      in_tx = !!(info & LBR_INFO_IN_TX);
+ +                      abort = !!(info & LBR_INFO_ABORT);
+ +                      cycles = (info & LBR_INFO_CYCLES);
+ +              }
+ +              if (lbr_flags & LBR_EIP_FLAGS) {
+ +                      mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ +                      pred = !mis;
+ +                      skip = 1;
+ +              }
+ +              if (lbr_flags & LBR_TSX) {
+ +                      in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ +                      abort = !!(from & LBR_FROM_FLAG_ABORT);
+ +                      skip = 3;
+ +              }
+ +              from = (u64)((((s64)from) << skip) >> skip);
+ +
+ +              /*
+ +               * Some CPUs report duplicated abort records,
+ +               * with the second entry not having an abort bit set.
+ +               * Skip them here. This loop runs backwards,
+ +               * so we need to undo the previous record.
+ +               * If the abort just happened outside the window
+ +               * the extra entry cannot be removed.
+ +               */
+ +              if (abort && x86_pmu.lbr_double_abort && out > 0)
+ +                      out--;
+ +
+ +              cpuc->lbr_entries[out].from      = from;
+ +              cpuc->lbr_entries[out].to        = to;
+ +              cpuc->lbr_entries[out].mispred   = mis;
+ +              cpuc->lbr_entries[out].predicted = pred;
+ +              cpuc->lbr_entries[out].in_tx     = in_tx;
+ +              cpuc->lbr_entries[out].abort     = abort;
+ +              cpuc->lbr_entries[out].cycles    = cycles;
+ +              cpuc->lbr_entries[out].reserved  = 0;
+ +              out++;
+ +      }
+ +      cpuc->lbr_stack.nr = out;
+ +}
+ +
+ +void intel_pmu_lbr_read(void)
+ +{
+ +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ +
+ +      if (!cpuc->lbr_users)
+ +              return;
+ +
+ +      if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+ +              intel_pmu_lbr_read_32(cpuc);
+ +      else
+ +              intel_pmu_lbr_read_64(cpuc);
+ +
+ +      intel_pmu_lbr_filter(cpuc);
+ +}
+ +
+ +/*
+ + * SW filter is used:
+ + * - in case there is no HW filter
+ + * - in case the HW filter has errata or limitations
+ + */
+ +static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+ +{
+ +      u64 br_type = event->attr.branch_sample_type;
+ +      int mask = 0;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_USER)
+ +              mask |= X86_BR_USER;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+ +              mask |= X86_BR_KERNEL;
+ +
+ +      /* we ignore BRANCH_HV here */
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_ANY)
+ +              mask |= X86_BR_ANY;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ +              mask |= X86_BR_ANY_CALL;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ +              mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ +              mask |= X86_BR_IND_CALL;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+ +              mask |= X86_BR_ABORT;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+ +              mask |= X86_BR_IN_TX;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+ +              mask |= X86_BR_NO_TX;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_COND)
+ +              mask |= X86_BR_JCC;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+ +              if (!x86_pmu_has_lbr_callstack())
+ +                      return -EOPNOTSUPP;
+ +              if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+ +                      return -EINVAL;
+ +              mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+ +                      X86_BR_CALL_STACK;
+ +      }
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+ +              mask |= X86_BR_IND_JMP;
+ +
+ +      if (br_type & PERF_SAMPLE_BRANCH_CALL)
+ +              mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+ +      /*
+ +       * stash actual user request into reg, it may
+ +       * be used by fixup code for some CPU
+ +       */
+ +      event->hw.branch_reg.reg = mask;
+ +      return 0;
+ +}
+ +
+ +/*
+ + * setup the HW LBR filter
+ + * Used only when available, may not be enough to disambiguate
+ + * all branches, may need the help of the SW filter
+ + */
+ +static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+ +{
+ +      struct hw_perf_event_extra *reg;
+ +      u64 br_type = event->attr.branch_sample_type;
+ +      u64 mask = 0, v;
+ +      int i;
+ +
+ +      for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+ +              if (!(br_type & (1ULL << i)))
+ +                      continue;
+ +
+ +              v = x86_pmu.lbr_sel_map[i];
+ +              if (v == LBR_NOT_SUPP)
+ +                      return -EOPNOTSUPP;
+ +
+ +              if (v != LBR_IGN)
+ +                      mask |= v;
+ +      }
+ +
+ +      reg = &event->hw.branch_reg;
+ +      reg->idx = EXTRA_REG_LBR;
+ +
+ +      /*
+ +       * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+ +       * in suppress mode. So LBR_SELECT should be set to
+ +       * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+ +       */
+ +      reg->config = mask ^ x86_pmu.lbr_sel_mask;
+ +
+ +      if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+ +          (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+ +          (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ +              reg->config |= LBR_NO_INFO;
+ +
+ +      return 0;
+ +}
+ +
+ +int intel_pmu_setup_lbr_filter(struct perf_event *event)
+ +{
+ +      int ret = 0;
+ +
+ +      /*
+ +       * no LBR on this PMU
+ +       */
+ +      if (!x86_pmu.lbr_nr)
+ +              return -EOPNOTSUPP;
+ +
+ +      /*
+ +       * setup SW LBR filter
+ +       */
+ +      ret = intel_pmu_setup_sw_lbr_filter(event);
+ +      if (ret)
+ +              return ret;
+ +
+ +      /*
+ +       * setup HW LBR filter, if any
+ +       */
+ +      if (x86_pmu.lbr_sel_map)
+ +              ret = intel_pmu_setup_hw_lbr_filter(event);
+ +
+ +      return ret;
+ +}
+ +
+ +/*
+ + * return the type of control flow change at address "from"
++ * instruction is not necessarily a branch (in case of interrupt).
+ + *
+ + * The branch type returned also includes the priv level of the
+ + * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ + *
+ + * If a branch type is unknown OR the instruction cannot be
+ + * decoded (e.g., text page not present), then X86_BR_NONE is
+ + * returned.
+ + */
+ +static int branch_type(unsigned long from, unsigned long to, int abort)
+ +{
+ +      struct insn insn;
+ +      void *addr;
+ +      int bytes_read, bytes_left;
+ +      int ret = X86_BR_NONE;
+ +      int ext, to_plm, from_plm;
+ +      u8 buf[MAX_INSN_SIZE];
+ +      int is64 = 0;
+ +
+ +      to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+ +      from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+ +
+ +      /*
+ +       * maybe zero if lbr did not fill up after a reset by the time
+ +       * we get a PMU interrupt
+ +       */
+ +      if (from == 0 || to == 0)
+ +              return X86_BR_NONE;
+ +
+ +      if (abort)
+ +              return X86_BR_ABORT | to_plm;
+ +
+ +      if (from_plm == X86_BR_USER) {
+ +              /*
+ +               * can happen if measuring at the user level only
+ +               * and we interrupt in a kernel thread, e.g., idle.
+ +               */
+ +              if (!current->mm)
+ +                      return X86_BR_NONE;
+ +
+ +              /* may fail if text not present */
+ +              bytes_left = copy_from_user_nmi(buf, (void __user *)from,
+ +                                              MAX_INSN_SIZE);
+ +              bytes_read = MAX_INSN_SIZE - bytes_left;
+ +              if (!bytes_read)
+ +                      return X86_BR_NONE;
+ +
+ +              addr = buf;
+ +      } else {
+ +              /*
+ +               * The LBR logs any address in the IP, even if the IP just
+ +               * faulted. This means userspace can control the from address.
+ +               * Ensure we don't blindy read any address by validating it is
+ +               * a known text address.
+ +               */
+ +              if (kernel_text_address(from)) {
+ +                      addr = (void *)from;
+ +                      /*
+ +                       * Assume we can get the maximum possible size
+ +                       * when grabbing kernel data.  This is not
+ +                       * _strictly_ true since we could possibly be
+ +                       * executing up next to a memory hole, but
+ +                       * it is very unlikely to be a problem.
+ +                       */
+ +                      bytes_read = MAX_INSN_SIZE;
+ +              } else {
+ +                      return X86_BR_NONE;
+ +              }
+ +      }
+ +
+ +      /*
+ +       * decoder needs to know the ABI especially
+ +       * on 64-bit systems running 32-bit apps
+ +       */
+ +#ifdef CONFIG_X86_64
+ +      is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+ +#endif
+ +      insn_init(&insn, addr, bytes_read, is64);
+ +      insn_get_opcode(&insn);
+ +      if (!insn.opcode.got)
+ +              return X86_BR_ABORT;
+ +
+ +      switch (insn.opcode.bytes[0]) {
+ +      case 0xf:
+ +              switch (insn.opcode.bytes[1]) {
+ +              case 0x05: /* syscall */
+ +              case 0x34: /* sysenter */
+ +                      ret = X86_BR_SYSCALL;
+ +                      break;
+ +              case 0x07: /* sysret */
+ +              case 0x35: /* sysexit */
+ +                      ret = X86_BR_SYSRET;
+ +                      break;
+ +              case 0x80 ... 0x8f: /* conditional */
+ +                      ret = X86_BR_JCC;
+ +                      break;
+ +              default:
+ +                      ret = X86_BR_NONE;
+ +              }
+ +              break;
+ +      case 0x70 ... 0x7f: /* conditional */
+ +              ret = X86_BR_JCC;
+ +              break;
+ +      case 0xc2: /* near ret */
+ +      case 0xc3: /* near ret */
+ +      case 0xca: /* far ret */
+ +      case 0xcb: /* far ret */
+ +              ret = X86_BR_RET;
+ +              break;
+ +      case 0xcf: /* iret */
+ +              ret = X86_BR_IRET;
+ +              break;
+ +      case 0xcc ... 0xce: /* int */
+ +              ret = X86_BR_INT;
+ +              break;
+ +      case 0xe8: /* call near rel */
+ +              insn_get_immediate(&insn);
+ +              if (insn.immediate1.value == 0) {
+ +                      /* zero length call */
+ +                      ret = X86_BR_ZERO_CALL;
+ +                      break;
+ +              }
+ +      case 0x9a: /* call far absolute */
+ +              ret = X86_BR_CALL;
+ +              break;
+ +      case 0xe0 ... 0xe3: /* loop jmp */
+ +              ret = X86_BR_JCC;
+ +              break;
+ +      case 0xe9 ... 0xeb: /* jmp */
+ +              ret = X86_BR_JMP;
+ +              break;
+ +      case 0xff: /* call near absolute, call far absolute ind */
+ +              insn_get_modrm(&insn);
+ +              ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+ +              switch (ext) {
+ +              case 2: /* near ind call */
+ +              case 3: /* far ind call */
+ +                      ret = X86_BR_IND_CALL;
+ +                      break;
+ +              case 4:
+ +              case 5:
+ +                      ret = X86_BR_IND_JMP;
+ +                      break;
+ +              }
+ +              break;
+ +      default:
+ +              ret = X86_BR_NONE;
+ +      }
+ +      /*
+ +       * interrupts, traps, faults (and thus ring transition) may
+ +       * occur on any instructions. Thus, to classify them correctly,
+ +       * we need to first look at the from and to priv levels. If they
+ +       * are different and to is in the kernel, then it indicates
+ +       * a ring transition. If the from instruction is not a ring
+ +       * transition instr (syscall, systenter, int), then it means
+ +       * it was a irq, trap or fault.
+ +       *
+ +       * we have no way of detecting kernel to kernel faults.
+ +       */
+ +      if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+ +          && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+ +              ret = X86_BR_IRQ;
+ +
+ +      /*
+ +       * branch priv level determined by target as
+ +       * is done by HW when LBR_SELECT is implemented
+ +       */
+ +      if (ret != X86_BR_NONE)
+ +              ret |= to_plm;
+ +
+ +      return ret;
+ +}
+ +
+ +/*
+ + * implement actual branch filter based on user demand.
+ + * Hardware may not exactly satisfy that request, thus
+ + * we need to inspect opcodes. Mismatched branches are
+ + * discarded. Therefore, the number of branches returned
+ + * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ + */
+ +static void
+ +intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+ +{
+ +      u64 from, to;
+ +      int br_sel = cpuc->br_sel;
+ +      int i, j, type;
+ +      bool compress = false;
+ +
+ +      /* if sampling all branches, then nothing to filter */
+ +      if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+ +              return;
+ +
+ +      for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+ +
+ +              from = cpuc->lbr_entries[i].from;
+ +              to = cpuc->lbr_entries[i].to;
+ +
+ +              type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+ +              if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+ +                      if (cpuc->lbr_entries[i].in_tx)
+ +                              type |= X86_BR_IN_TX;
+ +                      else
+ +                              type |= X86_BR_NO_TX;
+ +              }
+ +
+ +              /* if type does not correspond, then discard */
+ +              if (type == X86_BR_NONE || (br_sel & type) != type) {
+ +                      cpuc->lbr_entries[i].from = 0;
+ +                      compress = true;
+ +              }
+ +      }
+ +
+ +      if (!compress)
+ +              return;
+ +
+ +      /* remove all entries with from=0 */
+ +      for (i = 0; i < cpuc->lbr_stack.nr; ) {
+ +              if (!cpuc->lbr_entries[i].from) {
+ +                      j = i;
+ +                      while (++j < cpuc->lbr_stack.nr)
+ +                              cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ +                      cpuc->lbr_stack.nr--;
+ +                      if (!cpuc->lbr_entries[i].from)
+ +                              continue;
+ +              }
+ +              i++;
+ +      }
+ +}
+ +
+ +/*
+ + * Map interface branch filters onto LBR filters
+ + */
+ +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+ +      [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+ +      [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+ +      [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+ +      [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+ +      [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
+ +                                              | LBR_IND_JMP | LBR_FAR,
+ +      /*
+ +       * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+ +       */
+ +      [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
+ +       LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+ +      /*
+ +       * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+ +       */
+ +      [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+ +      [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
+ +      [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+ +};
+ +
+ +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+ +      [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+ +      [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+ +      [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+ +      [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+ +      [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+ +      [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+ +                                              | LBR_FAR,
+ +      [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+ +      [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+ +      [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
+ +      [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
+ +};
+ +
+ +static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+ +      [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+ +      [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+ +      [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+ +      [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+ +      [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+ +      [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+ +                                              | LBR_FAR,
+ +      [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+ +      [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+ +      [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
+ +                                              | LBR_RETURN | LBR_CALL_STACK,
+ +      [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
+ +      [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
+ +};
+ +
+ +/* core */
+ +void __init intel_pmu_lbr_init_core(void)
+ +{
+ +      x86_pmu.lbr_nr     = 4;
+ +      x86_pmu.lbr_tos    = MSR_LBR_TOS;
+ +      x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+ +      x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+ +
+ +      /*
+ +       * SW branch filter usage:
+ +       * - compensate for lack of HW filter
+ +       */
+ +      pr_cont("4-deep LBR, ");
+ +}
+ +
+ +/* nehalem/westmere */
+ +void __init intel_pmu_lbr_init_nhm(void)
+ +{
+ +      x86_pmu.lbr_nr     = 16;
+ +      x86_pmu.lbr_tos    = MSR_LBR_TOS;
+ +      x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
+ +      x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+ +
+ +      x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ +      x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+ +
+ +      /*
+ +       * SW branch filter usage:
+ +       * - workaround LBR_SEL errata (see above)
+ +       * - support syscall, sysret capture.
+ +       *   That requires LBR_FAR but that means far
+ +       *   jmp need to be filtered out
+ +       */
+ +      pr_cont("16-deep LBR, ");
+ +}
+ +
+ +/* sandy bridge */
+ +void __init intel_pmu_lbr_init_snb(void)
+ +{
+ +      x86_pmu.lbr_nr   = 16;
+ +      x86_pmu.lbr_tos  = MSR_LBR_TOS;
+ +      x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ +      x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+ +
+ +      x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ +      x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+ +
+ +      /*
+ +       * SW branch filter usage:
+ +       * - support syscall, sysret capture.
+ +       *   That requires LBR_FAR but that means far
+ +       *   jmp need to be filtered out
+ +       */
+ +      pr_cont("16-deep LBR, ");
+ +}
+ +
+ +/* haswell */
+ +void intel_pmu_lbr_init_hsw(void)
+ +{
+ +      x86_pmu.lbr_nr   = 16;
+ +      x86_pmu.lbr_tos  = MSR_LBR_TOS;
+ +      x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ +      x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+ +
+ +      x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ +      x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+ +
+ +      pr_cont("16-deep LBR, ");
+ +}
+ +
+ +/* skylake */
+ +__init void intel_pmu_lbr_init_skl(void)
+ +{
+ +      x86_pmu.lbr_nr   = 32;
+ +      x86_pmu.lbr_tos  = MSR_LBR_TOS;
+ +      x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ +      x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+ +
+ +      x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ +      x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+ +
+ +      /*
+ +       * SW branch filter usage:
+ +       * - support syscall, sysret capture.
+ +       *   That requires LBR_FAR but that means far
+ +       *   jmp need to be filtered out
+ +       */
+ +      pr_cont("32-deep LBR, ");
+ +}
+ +
+ +/* atom */
+ +void __init intel_pmu_lbr_init_atom(void)
+ +{
+ +      /*
+ +       * only models starting at stepping 10 seems
+ +       * to have an operational LBR which can freeze
+ +       * on PMU interrupt
+ +       */
+ +      if (boot_cpu_data.x86_model == 28
+ +          && boot_cpu_data.x86_mask < 10) {
+ +              pr_cont("LBR disabled due to erratum");
+ +              return;
+ +      }
+ +
+ +      x86_pmu.lbr_nr     = 8;
+ +      x86_pmu.lbr_tos    = MSR_LBR_TOS;
+ +      x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+ +      x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+ +
+ +      /*
+ +       * SW branch filter usage:
+ +       * - compensate for lack of HW filter
+ +       */
+ +      pr_cont("8-deep LBR, ");
+ +}
+ +
+ +/* Knights Landing */
+ +void intel_pmu_lbr_init_knl(void)
+ +{
+ +      x86_pmu.lbr_nr     = 8;
+ +      x86_pmu.lbr_tos    = MSR_LBR_TOS;
+ +      x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
+ +      x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+ +
+ +      x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ +      x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+ +
+ +      pr_cont("8-deep LBR, ");
+ +}
diff --cc arch/x86/events/perf_event.h

index 68155cafa8a13d88907493a4bf32f0e083189b73,0000000000000000000000000000000000000000..ba6ef18528c906444049587506535115687ce01c

mode 100644,000000..100644
--- 1/arch/x86/events/perf_event.h
--- /dev/null
+++ b/arch/x86/events/perf_event.h
@@@ -1,960 -1,0 +1,960 @@@
-  * will increase scheduling cycles for an over-commited system
+ +/*
+ + * Performance events x86 architecture header
+ + *
+ + *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ + *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ + *  Copyright (C) 2009 Jaswinder Singh Rajput
+ + *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ + *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ + *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ + *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ + *
+ + *  For licencing details see kernel-base/COPYING
+ + */
+ +
+ +#include <linux/perf_event.h>
+ +
+ +/* To enable MSR tracing please use the generic trace points. */
+ +
+ +/*
+ + *          |   NHM/WSM    |      SNB     |
+ + * register -------------------------------
+ + *          |  HT  | no HT |  HT  | no HT |
+ + *-----------------------------------------
+ + * offcore  | core | core  | cpu  | core  |
+ + * lbr_sel  | core | core  | cpu  | core  |
+ + * ld_lat   | cpu  | core  | cpu  | core  |
+ + *-----------------------------------------
+ + *
+ + * Given that there is a small number of shared regs,
+ + * we can pre-allocate their slot in the per-cpu
+ + * per-core reg tables.
+ + */
+ +enum extra_reg_type {
+ +      EXTRA_REG_NONE  = -1,   /* not used */
+ +
+ +      EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
+ +      EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
+ +      EXTRA_REG_LBR   = 2,    /* lbr_select */
+ +      EXTRA_REG_LDLAT = 3,    /* ld_lat_threshold */
+ +      EXTRA_REG_FE    = 4,    /* fe_* */
+ +
+ +      EXTRA_REG_MAX           /* number of entries needed */
+ +};
+ +
+ +struct event_constraint {
+ +      union {
+ +              unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ +              u64             idxmsk64;
+ +      };
+ +      u64     code;
+ +      u64     cmask;
+ +      int     weight;
+ +      int     overlap;
+ +      int     flags;
+ +};
+ +/*
+ + * struct hw_perf_event.flags flags
+ + */
+ +#define PERF_X86_EVENT_PEBS_LDLAT     0x0001 /* ld+ldlat data address sampling */
+ +#define PERF_X86_EVENT_PEBS_ST                0x0002 /* st data address sampling */
+ +#define PERF_X86_EVENT_PEBS_ST_HSW    0x0004 /* haswell style datala, store */
+ +#define PERF_X86_EVENT_COMMITTED      0x0008 /* event passed commit_txn */
+ +#define PERF_X86_EVENT_PEBS_LD_HSW    0x0010 /* haswell style datala, load */
+ +#define PERF_X86_EVENT_PEBS_NA_HSW    0x0020 /* haswell style datala, unknown */
+ +#define PERF_X86_EVENT_EXCL           0x0040 /* HT exclusivity on counter */
+ +#define PERF_X86_EVENT_DYNAMIC                0x0080 /* dynamic alloc'd constraint */
+ +#define PERF_X86_EVENT_RDPMC_ALLOWED  0x0100 /* grant rdpmc permission */
+ +#define PERF_X86_EVENT_EXCL_ACCT      0x0200 /* accounted EXCL event */
+ +#define PERF_X86_EVENT_AUTO_RELOAD    0x0400 /* use PEBS auto-reload */
+ +#define PERF_X86_EVENT_FREERUNNING    0x0800 /* use freerunning PEBS */
+ +
+ +
+ +struct amd_nb {
+ +      int nb_id;  /* NorthBridge id */
+ +      int refcnt; /* reference count */
+ +      struct perf_event *owners[X86_PMC_IDX_MAX];
+ +      struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+ +};
+ +
+ +/* The maximal number of PEBS events: */
+ +#define MAX_PEBS_EVENTS               8
+ +
+ +/*
+ + * Flags PEBS can handle without an PMI.
+ + *
+ + * TID can only be handled by flushing at context switch.
+ + *
+ + */
+ +#define PEBS_FREERUNNING_FLAGS \
+ +      (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+ +      PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+ +      PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+ +      PERF_SAMPLE_TRANSACTION)
+ +
+ +/*
+ + * A debug store configuration.
+ + *
+ + * We only support architectures that use 64bit fields.
+ + */
+ +struct debug_store {
+ +      u64     bts_buffer_base;
+ +      u64     bts_index;
+ +      u64     bts_absolute_maximum;
+ +      u64     bts_interrupt_threshold;
+ +      u64     pebs_buffer_base;
+ +      u64     pebs_index;
+ +      u64     pebs_absolute_maximum;
+ +      u64     pebs_interrupt_threshold;
+ +      u64     pebs_event_reset[MAX_PEBS_EVENTS];
+ +};
+ +
+ +/*
+ + * Per register state.
+ + */
+ +struct er_account {
+ +      raw_spinlock_t          lock;   /* per-core: protect structure */
+ +      u64                 config;     /* extra MSR config */
+ +      u64                 reg;        /* extra MSR number */
+ +      atomic_t            ref;        /* reference count */
+ +};
+ +
+ +/*
+ + * Per core/cpu state
+ + *
+ + * Used to coordinate shared registers between HT threads or
+ + * among events on a single PMU.
+ + */
+ +struct intel_shared_regs {
+ +      struct er_account       regs[EXTRA_REG_MAX];
+ +      int                     refcnt;         /* per-core: #HT threads */
+ +      unsigned                core_id;        /* per-core: core id */
+ +};
+ +
+ +enum intel_excl_state_type {
+ +      INTEL_EXCL_UNUSED    = 0, /* counter is unused */
+ +      INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
+ +      INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+ +};
+ +
+ +struct intel_excl_states {
+ +      enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+ +      bool sched_started; /* true if scheduling has started */
+ +};
+ +
+ +struct intel_excl_cntrs {
+ +      raw_spinlock_t  lock;
+ +
+ +      struct intel_excl_states states[2];
+ +
+ +      union {
+ +              u16     has_exclusive[2];
+ +              u32     exclusive_present;
+ +      };
+ +
+ +      int             refcnt;         /* per-core: #HT threads */
+ +      unsigned        core_id;        /* per-core: core id */
+ +};
+ +
+ +#define MAX_LBR_ENTRIES               32
+ +
+ +enum {
+ +      X86_PERF_KFREE_SHARED = 0,
+ +      X86_PERF_KFREE_EXCL   = 1,
+ +      X86_PERF_KFREE_MAX
+ +};
+ +
+ +struct cpu_hw_events {
+ +      /*
+ +       * Generic x86 PMC bits
+ +       */
+ +      struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
+ +      unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ +      unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ +      int                     enabled;
+ +
+ +      int                     n_events; /* the # of events in the below arrays */
+ +      int                     n_added;  /* the # last events in the below arrays;
+ +                                           they've never been enabled yet */
+ +      int                     n_txn;    /* the # last events in the below arrays;
+ +                                           added in the current transaction */
+ +      int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+ +      u64                     tags[X86_PMC_IDX_MAX];
+ +
+ +      struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+ +      struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+ +
+ +      int                     n_excl; /* the number of exclusive events */
+ +
+ +      unsigned int            txn_flags;
+ +      int                     is_fake;
+ +
+ +      /*
+ +       * Intel DebugStore bits
+ +       */
+ +      struct debug_store      *ds;
+ +      u64                     pebs_enabled;
+ +
+ +      /*
+ +       * Intel LBR bits
+ +       */
+ +      int                             lbr_users;
+ +      void                            *lbr_context;
+ +      struct perf_branch_stack        lbr_stack;
+ +      struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
+ +      struct er_account               *lbr_sel;
+ +      u64                             br_sel;
+ +
+ +      /*
+ +       * Intel host/guest exclude bits
+ +       */
+ +      u64                             intel_ctrl_guest_mask;
+ +      u64                             intel_ctrl_host_mask;
+ +      struct perf_guest_switch_msr    guest_switch_msrs[X86_PMC_IDX_MAX];
+ +
+ +      /*
+ +       * Intel checkpoint mask
+ +       */
+ +      u64                             intel_cp_status;
+ +
+ +      /*
+ +       * manage shared (per-core, per-cpu) registers
+ +       * used on Intel NHM/WSM/SNB
+ +       */
+ +      struct intel_shared_regs        *shared_regs;
+ +      /*
+ +       * manage exclusive counter access between hyperthread
+ +       */
+ +      struct event_constraint *constraint_list; /* in enable order */
+ +      struct intel_excl_cntrs         *excl_cntrs;
+ +      int excl_thread_id; /* 0 or 1 */
+ +
+ +      /*
+ +       * AMD specific bits
+ +       */
+ +      struct amd_nb                   *amd_nb;
+ +      /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
+ +      u64                             perf_ctr_virt_mask;
+ +
+ +      void                            *kfree_on_online[X86_PERF_KFREE_MAX];
+ +};
+ +
+ +#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+ +      { .idxmsk64 = (n) },            \
+ +      .code = (c),                    \
+ +      .cmask = (m),                   \
+ +      .weight = (w),                  \
+ +      .overlap = (o),                 \
+ +      .flags = f,                     \
+ +}
+ +
+ +#define EVENT_CONSTRAINT(c, n, m)     \
+ +      __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+ +
+ +#define INTEL_EXCLEVT_CONSTRAINT(c, n)        \
+ +      __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+ +                         0, PERF_X86_EVENT_EXCL)
+ +
+ +/*
+ + * The overlap flag marks event constraints with overlapping counter
+ + * masks. This is the case if the counter mask of such an event is not
+ + * a subset of any other counter mask of a constraint with an equal or
+ + * higher weight, e.g.:
+ + *
+ + *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ + *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ + *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ + *
+ + * The event scheduler may not select the correct counter in the first
+ + * cycle because it needs to know which subsequent events will be
+ + * scheduled. It may fail to schedule the events then. So we set the
+ + * overlap flag for such constraints to give the scheduler a hint which
+ + * events to select for counter rescheduling.
+ + *
+ + * Care must be taken as the rescheduling algorithm is O(n!) which
++ * will increase scheduling cycles for an over-committed system
+ + * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ + * and its counter masks must be kept at a minimum.
+ + */
+ +#define EVENT_CONSTRAINT_OVERLAP(c, n, m)     \
+ +      __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
+ +
+ +/*
+ + * Constraint on the Event code.
+ + */
+ +#define INTEL_EVENT_CONSTRAINT(c, n)  \
+ +      EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
+ +
+ +/*
+ + * Constraint on the Event code + UMask + fixed-mask
+ + *
+ + * filter mask to validate fixed counter events.
+ + * the following filters disqualify for fixed counters:
+ + *  - inv
+ + *  - edge
+ + *  - cnt-mask
+ + *  - in_tx
+ + *  - in_tx_checkpointed
+ + *  The other filters are supported by fixed counters.
+ + *  The any-thread option is supported starting with v3.
+ + */
+ +#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
+ +#define FIXED_EVENT_CONSTRAINT(c, n)  \
+ +      EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
+ +
+ +/*
+ + * Constraint on the Event code + UMask
+ + */
+ +#define INTEL_UEVENT_CONSTRAINT(c, n) \
+ +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+ +
+ +/* Constraint on specific umask bit only + event */
+ +#define INTEL_UBIT_EVENT_CONSTRAINT(c, n)     \
+ +      EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
+ +
+ +/* Like UEVENT_CONSTRAINT, but match flags too */
+ +#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)   \
+ +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+ +
+ +#define INTEL_EXCLUEVT_CONSTRAINT(c, n)       \
+ +      __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ +                         HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+ +
+ +#define INTEL_PLD_CONSTRAINT(c, n)    \
+ +      __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ +                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+ +
+ +#define INTEL_PST_CONSTRAINT(c, n)    \
+ +      __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+ +
+ +/* Event constraint, but match on all event flags too. */
+ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+ +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+ +
+ +/* Check only flags, but allow all event/umask */
+ +#define INTEL_ALL_EVENT_CONSTRAINT(code, n)   \
+ +      EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+ +
+ +/* Check flags and event code, and set the HSW store flag */
+ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+ +
+ +/* Check flags and event code, and set the HSW load flag */
+ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+ +
+ +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, \
+ +                        PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+ +
+ +/* Check flags and event code/umask, and set the HSW store flag */
+ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+ +
+ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, \
+ +                        PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
+ +
+ +/* Check flags and event code/umask, and set the HSW load flag */
+ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+ +
+ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, \
+ +                        PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+ +
+ +/* Check flags and event code/umask, and set the HSW N/A flag */
+ +#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
+ +      __EVENT_CONSTRAINT(code, n,                     \
+ +                        INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ +                        HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
+ +
+ +
+ +/*
+ + * We define the end marker as having a weight of -1
+ + * to enable blacklisting of events using a counter bitmask
+ + * of zero and thus a weight of zero.
+ + * The end marker has a weight that cannot possibly be
+ + * obtained from counting the bits in the bitmask.
+ + */
+ +#define EVENT_CONSTRAINT_END { .weight = -1 }
+ +
+ +/*
+ + * Check for end marker with weight == -1
+ + */
+ +#define for_each_event_constraint(e, c)       \
+ +      for ((e) = (c); (e)->weight != -1; (e)++)
+ +
+ +/*
+ + * Extra registers for specific events.
+ + *
+ + * Some events need large masks and require external MSRs.
+ + * Those extra MSRs end up being shared for all events on
+ + * a PMU and sometimes between PMU of sibling HT threads.
+ + * In either case, the kernel needs to handle conflicting
+ + * accesses to those extra, shared, regs. The data structure
+ + * to manage those registers is stored in cpu_hw_event.
+ + */
+ +struct extra_reg {
+ +      unsigned int            event;
+ +      unsigned int            msr;
+ +      u64                     config_mask;
+ +      u64                     valid_mask;
+ +      int                     idx;  /* per_xxx->regs[] reg index */
+ +      bool                    extra_msr_access;
+ +};
+ +
+ +#define EVENT_EXTRA_REG(e, ms, m, vm, i) {    \
+ +      .event = (e),                   \
+ +      .msr = (ms),                    \
+ +      .config_mask = (m),             \
+ +      .valid_mask = (vm),             \
+ +      .idx = EXTRA_REG_##i,           \
+ +      .extra_msr_access = true,       \
+ +      }
+ +
+ +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)    \
+ +      EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+ +
+ +#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
+ +      EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
+ +                      ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
+ +
+ +#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
+ +      INTEL_UEVENT_EXTRA_REG(c, \
+ +                             MSR_PEBS_LD_LAT_THRESHOLD, \
+ +                             0xffff, \
+ +                             LDLAT)
+ +
+ +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
+ +
+ +union perf_capabilities {
+ +      struct {
+ +              u64     lbr_format:6;
+ +              u64     pebs_trap:1;
+ +              u64     pebs_arch_reg:1;
+ +              u64     pebs_format:4;
+ +              u64     smm_freeze:1;
+ +              /*
+ +               * PMU supports separate counter range for writing
+ +               * values > 32bit.
+ +               */
+ +              u64     full_width_write:1;
+ +      };
+ +      u64     capabilities;
+ +};
+ +
+ +struct x86_pmu_quirk {
+ +      struct x86_pmu_quirk *next;
+ +      void (*func)(void);
+ +};
+ +
+ +union x86_pmu_config {
+ +      struct {
+ +              u64 event:8,
+ +                  umask:8,
+ +                  usr:1,
+ +                  os:1,
+ +                  edge:1,
+ +                  pc:1,
+ +                  interrupt:1,
+ +                  __reserved1:1,
+ +                  en:1,
+ +                  inv:1,
+ +                  cmask:8,
+ +                  event2:4,
+ +                  __reserved2:4,
+ +                  go:1,
+ +                  ho:1;
+ +      } bits;
+ +      u64 value;
+ +};
+ +
+ +#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+ +
+ +enum {
+ +      x86_lbr_exclusive_lbr,
+ +      x86_lbr_exclusive_bts,
+ +      x86_lbr_exclusive_pt,
+ +      x86_lbr_exclusive_max,
+ +};
+ +
+ +/*
+ + * struct x86_pmu - generic x86 pmu
+ + */
+ +struct x86_pmu {
+ +      /*
+ +       * Generic x86 PMC bits
+ +       */
+ +      const char      *name;
+ +      int             version;
+ +      int             (*handle_irq)(struct pt_regs *);
+ +      void            (*disable_all)(void);
+ +      void            (*enable_all)(int added);
+ +      void            (*enable)(struct perf_event *);
+ +      void            (*disable)(struct perf_event *);
+ +      int             (*hw_config)(struct perf_event *event);
+ +      int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+ +      unsigned        eventsel;
+ +      unsigned        perfctr;
+ +      int             (*addr_offset)(int index, bool eventsel);
+ +      int             (*rdpmc_index)(int index);
+ +      u64             (*event_map)(int);
+ +      int             max_events;
+ +      int             num_counters;
+ +      int             num_counters_fixed;
+ +      int             cntval_bits;
+ +      u64             cntval_mask;
+ +      union {
+ +                      unsigned long events_maskl;
+ +                      unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
+ +      };
+ +      int             events_mask_len;
+ +      int             apic;
+ +      u64             max_period;
+ +      struct event_constraint *
+ +                      (*get_event_constraints)(struct cpu_hw_events *cpuc,
+ +                                               int idx,
+ +                                               struct perf_event *event);
+ +
+ +      void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
+ +                                               struct perf_event *event);
+ +
+ +      void            (*start_scheduling)(struct cpu_hw_events *cpuc);
+ +
+ +      void            (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+ +
+ +      void            (*stop_scheduling)(struct cpu_hw_events *cpuc);
+ +
+ +      struct event_constraint *event_constraints;
+ +      struct x86_pmu_quirk *quirks;
+ +      int             perfctr_second_write;
+ +      bool            late_ack;
+ +      unsigned        (*limit_period)(struct perf_event *event, unsigned l);
+ +
+ +      /*
+ +       * sysfs attrs
+ +       */
+ +      int             attr_rdpmc_broken;
+ +      int             attr_rdpmc;
+ +      struct attribute **format_attrs;
+ +      struct attribute **event_attrs;
+ +
+ +      ssize_t         (*events_sysfs_show)(char *page, u64 config);
+ +      struct attribute **cpu_events;
+ +
+ +      /*
+ +       * CPU Hotplug hooks
+ +       */
+ +      int             (*cpu_prepare)(int cpu);
+ +      void            (*cpu_starting)(int cpu);
+ +      void            (*cpu_dying)(int cpu);
+ +      void            (*cpu_dead)(int cpu);
+ +
+ +      void            (*check_microcode)(void);
+ +      void            (*sched_task)(struct perf_event_context *ctx,
+ +                                    bool sched_in);
+ +
+ +      /*
+ +       * Intel Arch Perfmon v2+
+ +       */
+ +      u64                     intel_ctrl;
+ +      union perf_capabilities intel_cap;
+ +
+ +      /*
+ +       * Intel DebugStore bits
+ +       */
+ +      unsigned int    bts             :1,
+ +                      bts_active      :1,
+ +                      pebs            :1,
+ +                      pebs_active     :1,
+ +                      pebs_broken     :1,
+ +                      pebs_prec_dist  :1;
+ +      int             pebs_record_size;
+ +      int             pebs_buffer_size;
+ +      void            (*drain_pebs)(struct pt_regs *regs);
+ +      struct event_constraint *pebs_constraints;
+ +      void            (*pebs_aliases)(struct perf_event *event);
+ +      int             max_pebs_events;
+ +      unsigned long   free_running_flags;
+ +
+ +      /*
+ +       * Intel LBR
+ +       */
+ +      unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
+ +      int             lbr_nr;                    /* hardware stack size */
+ +      u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
+ +      const int       *lbr_sel_map;              /* lbr_select mappings */
+ +      bool            lbr_double_abort;          /* duplicated lbr aborts */
+ +
+ +      /*
+ +       * Intel PT/LBR/BTS are exclusive
+ +       */
+ +      atomic_t        lbr_exclusive[x86_lbr_exclusive_max];
+ +
+ +      /*
+ +       * Extra registers for events
+ +       */
+ +      struct extra_reg *extra_regs;
+ +      unsigned int flags;
+ +
+ +      /*
+ +       * Intel host/guest support (KVM)
+ +       */
+ +      struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+ +};
+ +
+ +struct x86_perf_task_context {
+ +      u64 lbr_from[MAX_LBR_ENTRIES];
+ +      u64 lbr_to[MAX_LBR_ENTRIES];
+ +      u64 lbr_info[MAX_LBR_ENTRIES];
+ +      int tos;
+ +      int lbr_callstack_users;
+ +      int lbr_stack_state;
+ +};
+ +
+ +#define x86_add_quirk(func_)                                          \
+ +do {                                                                  \
+ +      static struct x86_pmu_quirk __quirk __initdata = {              \
+ +              .func = func_,                                          \
+ +      };                                                              \
+ +      __quirk.next = x86_pmu.quirks;                                  \
+ +      x86_pmu.quirks = &__quirk;                                      \
+ +} while (0)
+ +
+ +/*
+ + * x86_pmu flags
+ + */
+ +#define PMU_FL_NO_HT_SHARING  0x1 /* no hyper-threading resource sharing */
+ +#define PMU_FL_HAS_RSP_1      0x2 /* has 2 equivalent offcore_rsp regs   */
+ +#define PMU_FL_EXCL_CNTRS     0x4 /* has exclusive counter requirements  */
+ +#define PMU_FL_EXCL_ENABLED   0x8 /* exclusive counter active */
+ +
+ +#define EVENT_VAR(_id)  event_attr_##_id
+ +#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+ +
+ +#define EVENT_ATTR(_name, _id)                                                \
+ +static struct perf_pmu_events_attr EVENT_VAR(_id) = {                 \
+ +      .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ +      .id             = PERF_COUNT_HW_##_id,                          \
+ +      .event_str      = NULL,                                         \
+ +};
+ +
+ +#define EVENT_ATTR_STR(_name, v, str)                                 \
+ +static struct perf_pmu_events_attr event_attr_##v = {                 \
+ +      .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+ +      .id             = 0,                                            \
+ +      .event_str      = str,                                          \
+ +};
+ +
+ +extern struct x86_pmu x86_pmu __read_mostly;
+ +
+ +static inline bool x86_pmu_has_lbr_callstack(void)
+ +{
+ +      return  x86_pmu.lbr_sel_map &&
+ +              x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+ +}
+ +
+ +DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+ +
+ +int x86_perf_event_set_period(struct perf_event *event);
+ +
+ +/*
+ + * Generalized hw caching related hw_event table, filled
+ + * in on a per model basis. A value of 0 means
+ + * 'not supported', -1 means 'hw_event makes no sense on
+ + * this CPU', any other value means the raw hw_event
+ + * ID.
+ + */
+ +
+ +#define C(x) PERF_COUNT_HW_CACHE_##x
+ +
+ +extern u64 __read_mostly hw_cache_event_ids
+ +                              [PERF_COUNT_HW_CACHE_MAX]
+ +                              [PERF_COUNT_HW_CACHE_OP_MAX]
+ +                              [PERF_COUNT_HW_CACHE_RESULT_MAX];
+ +extern u64 __read_mostly hw_cache_extra_regs
+ +                              [PERF_COUNT_HW_CACHE_MAX]
+ +                              [PERF_COUNT_HW_CACHE_OP_MAX]
+ +                              [PERF_COUNT_HW_CACHE_RESULT_MAX];
+ +
+ +u64 x86_perf_event_update(struct perf_event *event);
+ +
+ +static inline unsigned int x86_pmu_config_addr(int index)
+ +{
+ +      return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+ +                                 x86_pmu.addr_offset(index, true) : index);
+ +}
+ +
+ +static inline unsigned int x86_pmu_event_addr(int index)
+ +{
+ +      return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+ +                                x86_pmu.addr_offset(index, false) : index);
+ +}
+ +
+ +static inline int x86_pmu_rdpmc_index(int index)
+ +{
+ +      return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
+ +}
+ +
+ +int x86_add_exclusive(unsigned int what);
+ +
+ +void x86_del_exclusive(unsigned int what);
+ +
+ +int x86_reserve_hardware(void);
+ +
+ +void x86_release_hardware(void);
+ +
+ +void hw_perf_lbr_event_destroy(struct perf_event *event);
+ +
+ +int x86_setup_perfctr(struct perf_event *event);
+ +
+ +int x86_pmu_hw_config(struct perf_event *event);
+ +
+ +void x86_pmu_disable_all(void);
+ +
+ +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+ +                                        u64 enable_mask)
+ +{
+ +      u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
+ +
+ +      if (hwc->extra_reg.reg)
+ +              wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+ +      wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
+ +}
+ +
+ +void x86_pmu_enable_all(int added);
+ +
+ +int perf_assign_events(struct event_constraint **constraints, int n,
+ +                      int wmin, int wmax, int gpmax, int *assign);
+ +int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
+ +
+ +void x86_pmu_stop(struct perf_event *event, int flags);
+ +
+ +static inline void x86_pmu_disable_event(struct perf_event *event)
+ +{
+ +      struct hw_perf_event *hwc = &event->hw;
+ +
+ +      wrmsrl(hwc->config_base, hwc->config);
+ +}
+ +
+ +void x86_pmu_enable_event(struct perf_event *event);
+ +
+ +int x86_pmu_handle_irq(struct pt_regs *regs);
+ +
+ +extern struct event_constraint emptyconstraint;
+ +
+ +extern struct event_constraint unconstrained;
+ +
+ +static inline bool kernel_ip(unsigned long ip)
+ +{
+ +#ifdef CONFIG_X86_32
+ +      return ip > PAGE_OFFSET;
+ +#else
+ +      return (long)ip < 0;
+ +#endif
+ +}
+ +
+ +/*
+ + * Not all PMUs provide the right context information to place the reported IP
+ + * into full context. Specifically segment registers are typically not
+ + * supplied.
+ + *
+ + * Assuming the address is a linear address (it is for IBS), we fake the CS and
+ + * vm86 mode using the known zero-based code segment and 'fix up' the registers
+ + * to reflect this.
+ + *
+ + * Intel PEBS/LBR appear to typically provide the effective address, nothing
+ + * much we can do about that but pray and treat it like a linear address.
+ + */
+ +static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
+ +{
+ +      regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
+ +      if (regs->flags & X86_VM_MASK)
+ +              regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
+ +      regs->ip = ip;
+ +}
+ +
+ +ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
+ +ssize_t intel_event_sysfs_show(char *page, u64 config);
+ +
+ +struct attribute **merge_attr(struct attribute **a, struct attribute **b);
+ +
+ +#ifdef CONFIG_CPU_SUP_AMD
+ +
+ +int amd_pmu_init(void);
+ +
+ +#else /* CONFIG_CPU_SUP_AMD */
+ +
+ +static inline int amd_pmu_init(void)
+ +{
+ +      return 0;
+ +}
+ +
+ +#endif /* CONFIG_CPU_SUP_AMD */
+ +
+ +#ifdef CONFIG_CPU_SUP_INTEL
+ +
+ +static inline bool intel_pmu_has_bts(struct perf_event *event)
+ +{
+ +      if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+ +          !event->attr.freq && event->hw.sample_period == 1)
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
+ +int intel_pmu_save_and_restart(struct perf_event *event);
+ +
+ +struct event_constraint *
+ +x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ +                        struct perf_event *event);
+ +
+ +struct intel_shared_regs *allocate_shared_regs(int cpu);
+ +
+ +int intel_pmu_init(void);
+ +
+ +void init_debug_store_on_cpu(int cpu);
+ +
+ +void fini_debug_store_on_cpu(int cpu);
+ +
+ +void release_ds_buffers(void);
+ +
+ +void reserve_ds_buffers(void);
+ +
+ +extern struct event_constraint bts_constraint;
+ +
+ +void intel_pmu_enable_bts(u64 config);
+ +
+ +void intel_pmu_disable_bts(void);
+ +
+ +int intel_pmu_drain_bts_buffer(void);
+ +
+ +extern struct event_constraint intel_core2_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_atom_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_slm_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_nehalem_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_westmere_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_snb_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_ivb_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_hsw_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_bdw_pebs_event_constraints[];
+ +
+ +extern struct event_constraint intel_skl_pebs_event_constraints[];
+ +
+ +struct event_constraint *intel_pebs_constraints(struct perf_event *event);
+ +
+ +void intel_pmu_pebs_enable(struct perf_event *event);
+ +
+ +void intel_pmu_pebs_disable(struct perf_event *event);
+ +
+ +void intel_pmu_pebs_enable_all(void);
+ +
+ +void intel_pmu_pebs_disable_all(void);
+ +
+ +void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+ +
+ +void intel_ds_init(void);
+ +
+ +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+ +
+ +void intel_pmu_lbr_reset(void);
+ +
+ +void intel_pmu_lbr_enable(struct perf_event *event);
+ +
+ +void intel_pmu_lbr_disable(struct perf_event *event);
+ +
+ +void intel_pmu_lbr_enable_all(bool pmi);
+ +
+ +void intel_pmu_lbr_disable_all(void);
+ +
+ +void intel_pmu_lbr_read(void);
+ +
+ +void intel_pmu_lbr_init_core(void);
+ +
+ +void intel_pmu_lbr_init_nhm(void);
+ +
+ +void intel_pmu_lbr_init_atom(void);
+ +
+ +void intel_pmu_lbr_init_snb(void);
+ +
+ +void intel_pmu_lbr_init_hsw(void);
+ +
+ +void intel_pmu_lbr_init_skl(void);
+ +
+ +void intel_pmu_lbr_init_knl(void);
+ +
+ +void intel_pmu_pebs_data_source_nhm(void);
+ +
+ +int intel_pmu_setup_lbr_filter(struct perf_event *event);
+ +
+ +void intel_pt_interrupt(void);
+ +
+ +int intel_bts_interrupt(void);
+ +
+ +void intel_bts_enable_local(void);
+ +
+ +void intel_bts_disable_local(void);
+ +
+ +int p4_pmu_init(void);
+ +
+ +int p6_pmu_init(void);
+ +
+ +int knc_pmu_init(void);
+ +
+ +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+ +                        char *page);
+ +
+ +static inline int is_ht_workaround_enabled(void)
+ +{
+ +      return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
+ +}
+ +
+ +#else /* CONFIG_CPU_SUP_INTEL */
+ +
+ +static inline void reserve_ds_buffers(void)
+ +{
+ +}
+ +
+ +static inline void release_ds_buffers(void)
+ +{
+ +}
+ +
+ +static inline int intel_pmu_init(void)
+ +{
+ +      return 0;
+ +}
+ +
+ +static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+ +{
+ +      return NULL;
+ +}
+ +
+ +static inline int is_ht_workaround_enabled(void)
+ +{
+ +      return 0;
+ +}
+ +#endif /* CONFIG_CPU_SUP_INTEL */
diff --cc arch/x86/kernel/apic/apic.c
Simple merge
diff --cc arch/x86/kernel/cpu/common.c
Simple merge
diff --cc arch/x86/kernel/kgdb.c
Simple merge
diff --cc arch/x86/kernel/tsc.c
Simple merge
diff --cc arch/x86/kvm/mmu.c
Simple merge
diff --cc arch/x86/kvm/vmx.c
Simple merge
diff --cc arch/x86/kvm/x86.c
Simple merge
diff --cc arch/x86/lib/memset_64.S
Simple merge
diff --cc arch/x86/mm/mpx.c
Simple merge
diff --cc arch/x86/mm/pat.c
Simple merge
author	Ingo Molnar <mingo@kernel.org>
	Thu, 17 Mar 2016 08:44:57 +0000 (09:44 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 17 Mar 2016 08:44:57 +0000 (09:44 +0100)
		1	2
arch/x86/events/intel/lbr.c	patch \|	diff1 \|	\|	blob \| history
arch/x86/events/perf_event.h	patch \|	diff1 \|	\|	blob \| history
arch/x86/kernel/apic/apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/kgdb.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/tsc.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/lib/memset_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/mpx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/pat.c	patch \|	diff1 \|	diff2 \|	blob \| history