perf: riscv: preliminary RISC-V support
authorAlan Kao <alankao@andestech.com>
Thu, 19 Apr 2018 23:27:49 +0000 (07:27 +0800)
committerPalmer Dabbelt <palmer@sifive.com>
Mon, 4 Jun 2018 21:02:01 +0000 (14:02 -0700)
This patch provide a basic PMU, riscv_base_pmu, which supports two
general hardware event, instructions and cycles.  Furthermore, this
PMU serves as a reference implementation to ease the portings in
the future.

riscv_base_pmu should be able to run on any RISC-V machine that
conforms to the Priv-Spec.  Note that the latest qemu model hasn't
fully support a proper behavior of Priv-Spec 1.10 yet, but work
around should be easy with very small fixes.  Please check
https://github.com/riscv/riscv-qemu/pull/115 for future updates.

Cc: Nick Hu <nickhu@andestech.com>
Cc: Greentime Hu <greentime@andestech.com>
Signed-off-by: Alan Kao <alankao@andestech.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
arch/riscv/Kconfig
arch/riscv/include/asm/Kbuild
arch/riscv/include/asm/perf_event.h [new file with mode: 0644]
arch/riscv/kernel/Makefile
arch/riscv/kernel/perf_event.c [new file with mode: 0644]

index cd4fd85fde84e76fa205737076c5a721b9f4c6f4..4495604394e504df8d428b5ecfbe9c8a537f6e64 100644 (file)
@@ -25,6 +25,7 @@ config RISCV
        select HAVE_DMA_API_DEBUG
        select HAVE_DMA_CONTIGUOUS
        select HAVE_GENERIC_DMA_COHERENT
+       select HAVE_PERF_EVENTS
        select IRQ_DOMAIN
        select NO_BOOTMEM
        select RISCV_ISA_A if SMP
@@ -198,6 +199,19 @@ config RISCV_ISA_C
 config RISCV_ISA_A
        def_bool y
 
+menu "supported PMU type"
+       depends on PERF_EVENTS
+
+config RISCV_BASE_PMU
+       bool "Base Performance Monitoring Unit"
+       def_bool y
+       help
+         A base PMU that serves as a reference implementation and has limited
+         feature of perf.  It can run on any RISC-V machines so serves as the
+         fallback, but this option can also be disable to reduce kernel size.
+
+endmenu
+
 endmenu
 
 menu "Kernel type"
index 4286a5f838760c7ad4d922ddd2b49286c374df56..576ffdca06baf1d7fe51de655a63286550c93626 100644 (file)
@@ -25,6 +25,7 @@ generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
+generic-y += local64.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
new file mode 100644 (file)
index 0000000..0e638a0
--- /dev/null
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 SiFive
+ * Copyright (C) 2018 Andes Technology Corporation
+ *
+ */
+
+#ifndef _ASM_RISCV_PERF_EVENT_H
+#define _ASM_RISCV_PERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+
+#define RISCV_BASE_COUNTERS    2
+
+/*
+ * The RISCV_MAX_COUNTERS parameter should be specified.
+ */
+
+#ifdef CONFIG_RISCV_BASE_PMU
+#define RISCV_MAX_COUNTERS     2
+#endif
+
+#ifndef RISCV_MAX_COUNTERS
+#error "Please provide a valid RISCV_MAX_COUNTERS for the PMU."
+#endif
+
+/*
+ * These are the indexes of bits in counteren register *minus* 1,
+ * except for cycle.  It would be coherent if it can directly mapped
+ * to counteren bit definition, but there is a *time* register at
+ * counteren[1].  Per-cpu structure is scarce resource here.
+ *
+ * According to the spec, an implementation can support counter up to
+ * mhpmcounter31, but many high-end processors has at most 6 general
+ * PMCs, we give the definition to MHPMCOUNTER8 here.
+ */
+#define RISCV_PMU_CYCLE                0
+#define RISCV_PMU_INSTRET      1
+#define RISCV_PMU_MHPMCOUNTER3 2
+#define RISCV_PMU_MHPMCOUNTER4 3
+#define RISCV_PMU_MHPMCOUNTER5 4
+#define RISCV_PMU_MHPMCOUNTER6 5
+#define RISCV_PMU_MHPMCOUNTER7 6
+#define RISCV_PMU_MHPMCOUNTER8 7
+
+#define RISCV_OP_UNSUPP                (-EOPNOTSUPP)
+
+struct cpu_hw_events {
+       /* # currently enabled events*/
+       int                     n_events;
+       /* currently enabled events */
+       struct perf_event       *events[RISCV_MAX_COUNTERS];
+       /* vendor-defined PMU data */
+       void                    *platform;
+};
+
+struct riscv_pmu {
+       struct pmu      *pmu;
+
+       /* generic hw/cache events table */
+       const int       *hw_events;
+       const int       (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+                                      [PERF_COUNT_HW_CACHE_OP_MAX]
+                                      [PERF_COUNT_HW_CACHE_RESULT_MAX];
+       /* method used to map hw/cache events */
+       int             (*map_hw_event)(u64 config);
+       int             (*map_cache_event)(u64 config);
+
+       /* max generic hw events in map */
+       int             max_events;
+       /* number total counters, 2(base) + x(general) */
+       int             num_counters;
+       /* the width of the counter */
+       int             counter_width;
+
+       /* vendor-defined PMU features */
+       void            *platform;
+
+       irqreturn_t     (*handle_irq)(int irq_num, void *dev);
+       int             irq;
+};
+
+#endif /* _ASM_RISCV_PERF_EVENT_H */
index 8586dd96c2f012ca42af358c0ab58cd2ce277322..e1274fc03af42de6b0a98fe0911e98d14db4dec8 100644 (file)
@@ -39,4 +39,6 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
 obj-$(CONFIG_FUNCTION_TRACER)  += mcount.o ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)   += mcount-dyn.o
 
+obj-$(CONFIG_PERF_EVENTS)      += perf_event.o
+
 clean:
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
new file mode 100644 (file)
index 0000000..b0e10c4
--- /dev/null
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2009 Jaswinder Singh Rajput
+ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ * Copyright (C) 2009 Google, Inc., Stephane Eranian
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ * Copyright (C) 2018 Andes Technology Corporation
+ *
+ * Perf_events support for RISC-V platforms.
+ *
+ * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough
+ * functionality for perf event to fully work, this file provides
+ * the very basic framework only.
+ *
+ * For platform portings, please check Documentations/riscv/pmu.txt.
+ *
+ * The Copyright line includes x86 and tile ones.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/bitmap.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <linux/atomic.h>
+#include <linux/of.h>
+#include <asm/perf_event.h>
+
+static const struct riscv_pmu *riscv_pmu __read_mostly;
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+/*
+ * Hardware & cache maps and their methods
+ */
+
+static const int riscv_hw_event_map[] = {
+       [PERF_COUNT_HW_CPU_CYCLES]              = RISCV_PMU_CYCLE,
+       [PERF_COUNT_HW_INSTRUCTIONS]            = RISCV_PMU_INSTRET,
+       [PERF_COUNT_HW_CACHE_REFERENCES]        = RISCV_OP_UNSUPP,
+       [PERF_COUNT_HW_CACHE_MISSES]            = RISCV_OP_UNSUPP,
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = RISCV_OP_UNSUPP,
+       [PERF_COUNT_HW_BRANCH_MISSES]           = RISCV_OP_UNSUPP,
+       [PERF_COUNT_HW_BUS_CYCLES]              = RISCV_OP_UNSUPP,
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+[PERF_COUNT_HW_CACHE_OP_MAX]
+[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(L1D)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+       },
+       [C(L1I)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+       },
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+       },
+       [C(DTLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] =  RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] =  RISCV_OP_UNSUPP,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+       },
+       [C(ITLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+       },
+       [C(BPU)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+                       [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+               },
+       },
+};
+
+static int riscv_map_hw_event(u64 config)
+{
+       if (config >= riscv_pmu->max_events)
+               return -EINVAL;
+
+       return riscv_pmu->hw_events[config];
+}
+
+int riscv_map_cache_decode(u64 config, unsigned int *type,
+                          unsigned int *op, unsigned int *result)
+{
+       return -ENOENT;
+}
+
+static int riscv_map_cache_event(u64 config)
+{
+       unsigned int type, op, result;
+       int err = -ENOENT;
+               int code;
+
+       err = riscv_map_cache_decode(config, &type, &op, &result);
+       if (!riscv_pmu->cache_events || err)
+               return err;
+
+       if (type >= PERF_COUNT_HW_CACHE_MAX ||
+           op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+           result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return -EINVAL;
+
+       code = (*riscv_pmu->cache_events)[type][op][result];
+       if (code == RISCV_OP_UNSUPP)
+               return -EINVAL;
+
+       return code;
+}
+
+/*
+ * Low-level functions: reading/writing counters
+ */
+
+static inline u64 read_counter(int idx)
+{
+       u64 val = 0;
+
+       switch (idx) {
+       case RISCV_PMU_CYCLE:
+               val = csr_read(cycle);
+               break;
+       case RISCV_PMU_INSTRET:
+               val = csr_read(instret);
+               break;
+       default:
+               WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
+               return -EINVAL;
+       }
+
+       return val;
+}
+
+static inline void write_counter(int idx, u64 value)
+{
+       /* currently not supported */
+       WARN_ON_ONCE(1);
+}
+
+/*
+ * pmu->read: read and update the counter
+ *
+ * Other architectures' implementation often have a xxx_perf_event_update
+ * routine, which can return counter values when called in the IRQ, but
+ * return void when being called by the pmu->read method.
+ */
+static void riscv_pmu_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 prev_raw_count, new_raw_count;
+       u64 oldval;
+       int idx = hwc->idx;
+       u64 delta;
+
+       do {
+               prev_raw_count = local64_read(&hwc->prev_count);
+               new_raw_count = read_counter(idx);
+
+               oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                                        new_raw_count);
+       } while (oldval != prev_raw_count);
+
+       /*
+        * delta is the value to update the counter we maintain in the kernel.
+        */
+       delta = (new_raw_count - prev_raw_count) &
+               ((1ULL << riscv_pmu->counter_width) - 1);
+       local64_add(delta, &event->count);
+       /*
+        * Something like local64_sub(delta, &hwc->period_left) here is
+        * needed if there is an interrupt for perf.
+        */
+}
+
+/*
+ * State transition functions:
+ *
+ * stop()/start() & add()/del()
+ */
+
+/*
+ * pmu->stop: stop the counter
+ */
+static void riscv_pmu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+       hwc->state |= PERF_HES_STOPPED;
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               riscv_pmu->pmu->read(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+/*
+ * pmu->start: start the event.
+ */
+static void riscv_pmu_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       if (flags & PERF_EF_RELOAD) {
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+               /*
+                * Set the counter to the period to the next interrupt here,
+                * if you have any.
+                */
+       }
+
+       hwc->state = 0;
+       perf_event_update_userpage(event);
+
+       /*
+        * Since we cannot write to counters, this serves as an initialization
+        * to the delta-mechanism in pmu->read(); otherwise, the delta would be
+        * wrong when pmu->read is called for the first time.
+        */
+       local64_set(&hwc->prev_count, read_counter(hwc->idx));
+}
+
+/*
+ * pmu->add: add the event to PMU.
+ */
+static int riscv_pmu_add(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (cpuc->n_events == riscv_pmu->num_counters)
+               return -ENOSPC;
+
+       /*
+        * We don't have general conunters, so no binding-event-to-counter
+        * process here.
+        *
+        * Indexing using hwc->config generally not works, since config may
+        * contain extra information, but here the only info we have in
+        * hwc->config is the event index.
+        */
+       hwc->idx = hwc->config;
+       cpuc->events[hwc->idx] = event;
+       cpuc->n_events++;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       if (flags & PERF_EF_START)
+               riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+/*
+ * pmu->del: delete the event from PMU.
+ */
+static void riscv_pmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+
+       cpuc->events[hwc->idx] = NULL;
+       cpuc->n_events--;
+       riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
+       perf_event_update_userpage(event);
+}
+
+/*
+ * Interrupt: a skeletion for reference.
+ */
+
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
+{
+       return IRQ_NONE;
+}
+
+static int reserve_pmc_hardware(void)
+{
+       int err = 0;
+
+       mutex_lock(&pmc_reserve_mutex);
+       if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
+               err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
+                                 IRQF_PERCPU, "riscv-base-perf", NULL);
+       }
+       mutex_unlock(&pmc_reserve_mutex);
+
+       return err;
+}
+
+void release_pmc_hardware(void)
+{
+       mutex_lock(&pmc_reserve_mutex);
+       if (riscv_pmu->irq >= 0)
+               free_irq(riscv_pmu->irq, NULL);
+       mutex_unlock(&pmc_reserve_mutex);
+}
+
+/*
+ * Event Initialization/Finalization
+ */
+
+static atomic_t riscv_active_events = ATOMIC_INIT(0);
+
+static void riscv_event_destroy(struct perf_event *event)
+{
+       if (atomic_dec_return(&riscv_active_events) == 0)
+               release_pmc_hardware();
+}
+
+static int riscv_event_init(struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       int err;
+       int code;
+
+       if (atomic_inc_return(&riscv_active_events) == 1) {
+               err = reserve_pmc_hardware();
+
+               if (err) {
+                       pr_warn("PMC hardware not available\n");
+                       atomic_dec(&riscv_active_events);
+                       return -EBUSY;
+               }
+       }
+
+       switch (event->attr.type) {
+       case PERF_TYPE_HARDWARE:
+               code = riscv_pmu->map_hw_event(attr->config);
+               break;
+       case PERF_TYPE_HW_CACHE:
+               code = riscv_pmu->map_cache_event(attr->config);
+               break;
+       case PERF_TYPE_RAW:
+               return -EOPNOTSUPP;
+       default:
+               return -ENOENT;
+       }
+
+       event->destroy = riscv_event_destroy;
+       if (code < 0) {
+               event->destroy(event);
+               return code;
+       }
+
+       /*
+        * idx is set to -1 because the index of a general event should not be
+        * decided until binding to some counter in pmu->add().
+        *
+        * But since we don't have such support, later in pmu->add(), we just
+        * use hwc->config as the index instead.
+        */
+       hwc->config = code;
+       hwc->idx = -1;
+
+       return 0;
+}
+
+/*
+ * Initialization
+ */
+
+static struct pmu min_pmu = {
+       .name           = "riscv-base",
+       .event_init     = riscv_event_init,
+       .add            = riscv_pmu_add,
+       .del            = riscv_pmu_del,
+       .start          = riscv_pmu_start,
+       .stop           = riscv_pmu_stop,
+       .read           = riscv_pmu_read,
+};
+
+static const struct riscv_pmu riscv_base_pmu = {
+       .pmu = &min_pmu,
+       .max_events = ARRAY_SIZE(riscv_hw_event_map),
+       .map_hw_event = riscv_map_hw_event,
+       .hw_events = riscv_hw_event_map,
+       .map_cache_event = riscv_map_cache_event,
+       .cache_events = &riscv_cache_event_map,
+       .counter_width = 63,
+       .num_counters = RISCV_BASE_COUNTERS + 0,
+       .handle_irq = &riscv_base_pmu_handle_irq,
+
+       /* This means this PMU has no IRQ. */
+       .irq = -1,
+};
+
+static const struct of_device_id riscv_pmu_of_ids[] = {
+       {.compatible = "riscv,base-pmu",        .data = &riscv_base_pmu},
+       { /* sentinel value */ }
+};
+
+int __init init_hw_perf_events(void)
+{
+       struct device_node *node = of_find_node_by_type(NULL, "pmu");
+       const struct of_device_id *of_id;
+
+       riscv_pmu = &riscv_base_pmu;
+
+       if (node) {
+               of_id = of_match_node(riscv_pmu_of_ids, node);
+
+               if (of_id)
+                       riscv_pmu = of_id->data;
+       }
+
+       perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
+       return 0;
+}
+arch_initcall(init_hw_perf_events);