arm64: kernel: Add arch-specific SDEI entry code and CPU masking

author James Morse <james.morse@arm.com>

Mon, 8 Jan 2018 15:38:12 +0000 (15:38 +0000)

committer Catalin Marinas <catalin.marinas@arm.com>

Sat, 13 Jan 2018 10:45:17 +0000 (10:45 +0000)
author James Morse <james.morse@arm.com>
Mon, 8 Jan 2018 15:38:12 +0000 (15:38 +0000)
committer Catalin Marinas <catalin.marinas@arm.com>
Sat, 13 Jan 2018 10:45:17 +0000 (10:45 +0000)
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h

index 59f26b6e673d6ea0439f7271d6f86467948e878b..d58a31ab525a3f0b49dcad756e2fd08e1a517641 100644 (file)
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -3,6 +3,49 @@
  #ifndef __ASM_SDEI_H
  #define __ASM_SDEI_H
  
-/* Later patches add the arch specific bits */
+/* Values for sdei_exit_mode */
+#define SDEI_EXIT_HVC  0
+#define SDEI_EXIT_SMC  1
  
-#endif /* __ASM_SDEI_H */
+#define SDEI_STACK_SIZE                IRQ_STACK_SIZE
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/virt.h>
+
+extern unsigned long sdei_exit_mode;
+
+/* Software Delegated Exception entry point from firmware*/
+asmlinkage void __sdei_asm_handler(unsigned long event_num, unsigned long arg,
+                                  unsigned long pc, unsigned long pstate);
+
+/*
+ * The above entry point does the minimum to call C code. This function does
+ * anything else, before calling the driver.
+ */
+struct sdei_registered_event;
+asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
+                                       struct sdei_registered_event *arg);
+
+unsigned long sdei_arch_get_entry_point(int conduit);
+#define sdei_arch_get_entry_point(x)   sdei_arch_get_entry_point(x)
+
+bool _on_sdei_stack(unsigned long sp);
+static inline bool on_sdei_stack(unsigned long sp)
+{
+       if (!IS_ENABLED(CONFIG_VMAP_STACK))
+               return false;
+       if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+               return false;
+       if (in_nmi())
+               return _on_sdei_stack(sp);
+
+       return false;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h

index 6ad30776e984d071134f8762395565859691e0b0..472ef944e93260be2faad4182431331e21bc3569 100644 (file)
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -22,6 +22,7 @@
  
  #include <asm/memory.h>
  #include <asm/ptrace.h>
+#include <asm/sdei.h>
  
  struct stackframe {
         unsigned long fp;
@@ -85,6 +86,8 @@ static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp
                 return true;
         if (on_overflow_stack(sp))
                 return true;
+       if (on_sdei_stack(sp))
+               return true;
  
         return false;
  }
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

index 0c760db04858e0234dabc63d24fa5a38abcce6e1..b87541360f43824e6df20961af87a3154f1f7c0f 100644 (file)
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -52,6 +52,7 @@ arm64-obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o    \
  arm64-obj-$(CONFIG_ARM64_RELOC_TEST)   += arm64-reloc-test.o
  arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
  arm64-obj-$(CONFIG_CRASH_DUMP)         += crash_dump.o
+arm64-obj-$(CONFIG_ARM_SDE_INTERFACE)  += sdei.o
  
  ifeq ($(CONFIG_KVM),y)
  arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR)    += bpi.o
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c

index af247d10252f5019e8b44361bb4a8a724300d7e1..1dcc493f5765df99ded63fc6a10a047c3ffaa6d8 100644 (file)
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -18,6 +18,7 @@
   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   */
  
+#include <linux/arm_sdei.h>
  #include <linux/sched.h>
  #include <linux/mm.h>
  #include <linux/dma-mapping.h>
@@ -157,6 +158,10 @@ int main(void)
    BLANK();
  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
    DEFINE(TRAMP_VALIAS,         TRAMP_VALIAS);
+#endif
+#ifdef CONFIG_ARM_SDE_INTERFACE
+  DEFINE(SDEI_EVENT_INTREGS,   offsetof(struct sdei_registered_event, interrupted_regs));
+  DEFINE(SDEI_EVENT_PRIORITY,  offsetof(struct sdei_registered_event, priority));
  #endif
    return 0;
  }
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

index 07a7d4db8ec4acaa92e5fa7e58be472dcaf707b4..40bf5083d182343bbbc8408df3222059ea433b17 100644 (file)
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1153,3 +1153,104 @@ ENTRY(ret_from_fork)
         b       ret_to_user
  ENDPROC(ret_from_fork)
  NOKPROBE(ret_from_fork)
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+
+#include <asm/sdei.h>
+#include <uapi/linux/arm_sdei.h>
+
+/*
+ * Software Delegated Exception entry point.
+ *
+ * x0: Event number
+ * x1: struct sdei_registered_event argument from registration time.
+ * x2: interrupted PC
+ * x3: interrupted PSTATE
+ *
+ * Firmware has preserved x0->x17 for us, we must save/restore the rest to
+ * follow SMC-CC. We save (or retrieve) all the registers as the handler may
+ * want them.
+ */
+ENTRY(__sdei_asm_handler)
+       stp     x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
+       stp     x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
+       stp     x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
+       stp     x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4]
+       stp     x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5]
+       stp     x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6]
+       stp     x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7]
+       stp     x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8]
+       stp     x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9]
+       stp     x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10]
+       stp     x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11]
+       stp     x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12]
+       stp     x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13]
+       stp     x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14]
+       mov     x4, sp
+       stp     lr, x4, [x1, #SDEI_EVENT_INTREGS + S_LR]
+
+       mov     x19, x1
+
+#ifdef CONFIG_VMAP_STACK
+       /*
+        * entry.S may have been using sp as a scratch register, find whether
+        * this is a normal or critical event and switch to the appropriate
+        * stack for this CPU.
+        */
+       ldrb    w4, [x19, #SDEI_EVENT_PRIORITY]
+       cbnz    w4, 1f
+       ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
+       b       2f
+1:     ldr_this_cpu dst=x5, sym=sdei_stack_critical_ptr, tmp=x6
+2:     mov     x6, #SDEI_STACK_SIZE
+       add     x5, x5, x6
+       mov     sp, x5
+#endif
+
+       /*
+        * We may have interrupted userspace, or a guest, or exit-from or
+        * return-to either of these. We can't trust sp_el0, restore it.
+        */
+       mrs     x28, sp_el0
+       ldr_this_cpu    dst=x0, sym=__entry_task, tmp=x1
+       msr     sp_el0, x0
+
+       /* If we interrupted the kernel point to the previous stack/frame. */
+       and     x0, x3, #0xc
+       mrs     x1, CurrentEL
+       cmp     x0, x1
+       csel    x29, x29, xzr, eq       // fp, or zero
+       csel    x4, x2, xzr, eq         // elr, or zero
+
+       stp     x29, x4, [sp, #-16]!
+       mov     x29, sp
+
+       add     x0, x19, #SDEI_EVENT_INTREGS
+       mov     x1, x19
+       bl      __sdei_handler
+
+       msr     sp_el0, x28
+       /* restore regs >x17 that we clobbered */
+       ldp     x28, x29, [x19, #SDEI_EVENT_INTREGS + 16 * 14]
+       ldp     lr, x4, [x19, #SDEI_EVENT_INTREGS + S_LR]
+       mov     sp, x4
+       ldp     x18, x19, [x19, #SDEI_EVENT_INTREGS + 16 * 9]
+
+       mov     x1, x0                  // address to complete_and_resume
+       /* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */
+       cmp     x0, #1
+       mov_q   x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE
+       mov_q   x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+       csel    x0, x2, x3, ls
+
+       /* On success, this call never returns... */
+       ldr_l   x2, sdei_exit_mode
+       cmp     x2, #SDEI_EXIT_SMC
+       b.ne    1f
+       smc     #0
+       b       .
+1:     hvc     #0
+       b       .
+ENDPROC(__sdei_asm_handler)
+NOKPROBE(__sdei_asm_handler)
+#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c

new file mode 100644 (file)

index 0000000..f9dffac
--- /dev/null
+++ b/arch/arm64/kernel/sdei.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2017 Arm Ltd.
+#define pr_fmt(fmt) "sdei: " fmt
+
+#include <linux/arm_sdei.h>
+#include <linux/hardirq.h>
+#include <linux/irqflags.h>
+#include <linux/sched/task_stack.h>
+#include <linux/uaccess.h>
+
+#include <asm/alternative.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <asm/vmap_stack.h>
+
+unsigned long sdei_exit_mode;
+
+/*
+ * VMAP'd stacks checking for stack overflow on exception using sp as a scratch
+ * register, meaning SDEI has to switch to its own stack. We need two stacks as
+ * a critical event may interrupt a normal event that has just taken a
+ * synchronous exception, and is using sp as scratch register. For a critical
+ * event interrupting a normal event, we can't reliably tell if we were on the
+ * sdei stack.
+ * For now, we allocate stacks when the driver is probed.
+ */
+DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
+
+#ifdef CONFIG_VMAP_STACK
+DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
+#endif
+
+static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
+{
+       unsigned long *p;
+
+       p = per_cpu(*ptr, cpu);
+       if (p) {
+               per_cpu(*ptr, cpu) = NULL;
+               vfree(p);
+       }
+}
+
+static void free_sdei_stacks(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               _free_sdei_stack(&sdei_stack_normal_ptr, cpu);
+               _free_sdei_stack(&sdei_stack_critical_ptr, cpu);
+       }
+}
+
+static int _init_sdei_stack(unsigned long * __percpu *ptr, int cpu)
+{
+       unsigned long *p;
+
+       p = arch_alloc_vmap_stack(SDEI_STACK_SIZE, cpu_to_node(cpu));
+       if (!p)
+               return -ENOMEM;
+       per_cpu(*ptr, cpu) = p;
+
+       return 0;
+}
+
+static int init_sdei_stacks(void)
+{
+       int cpu;
+       int err = 0;
+
+       for_each_possible_cpu(cpu) {
+               err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
+               if (err)
+                       break;
+               err = _init_sdei_stack(&sdei_stack_critical_ptr, cpu);
+               if (err)
+                       break;
+       }
+
+       if (err)
+               free_sdei_stacks();
+
+       return err;
+}
+
+bool _on_sdei_stack(unsigned long sp)
+{
+       unsigned long low, high;
+
+       if (!IS_ENABLED(CONFIG_VMAP_STACK))
+               return false;
+
+       low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
+       high = low + SDEI_STACK_SIZE;
+
+       if (low <= sp && sp < high)
+               return true;
+
+       low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
+       high = low + SDEI_STACK_SIZE;
+
+       return (low <= sp && sp < high);
+}
+
+unsigned long sdei_arch_get_entry_point(int conduit)
+{
+       /*
+        * SDEI works between adjacent exception levels. If we booted at EL1 we
+        * assume a hypervisor is marshalling events. If we booted at EL2 and
+        * dropped to EL1 because we don't support VHE, then we can't support
+        * SDEI.
+        */
+       if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+               pr_err("Not supported on this hardware/boot configuration\n");
+               return 0;
+       }
+
+       if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+               if (init_sdei_stacks())
+                       return 0;
+       }
+
+       sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+       return (unsigned long)__sdei_asm_handler;
+}
+
+/*
+ * __sdei_handler() returns one of:
+ *  SDEI_EV_HANDLED -  success, return to the interrupted context.
+ *  SDEI_EV_FAILED  -  failure, return this error code to firmare.
+ *  virtual-address -  success, return to this address.
+ */
+static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
+                                            struct sdei_registered_event *arg)
+{
+       u32 mode;
+       int i, err = 0;
+       const int clobbered_registers = 4;
+       u64 elr = read_sysreg(elr_el1);
+       u32 kernel_mode = read_sysreg(CurrentEL) | 1;   /* +SPSel */
+       unsigned long vbar = read_sysreg(vbar_el1);
+
+       /* Retrieve the missing registers values */
+       for (i = 0; i < clobbered_registers; i++) {
+               /* from within the handler, this call always succeeds */
+               sdei_api_event_context(i, &regs->regs[i]);
+       }
+
+       /*
+        * We didn't take an exception to get here, set PAN. UAO will be cleared
+        * by sdei_event_handler()s set_fs(USER_DS) call.
+        */
+       __uaccess_enable_hw_pan();
+
+       err = sdei_event_handler(regs, arg);
+       if (err)
+               return SDEI_EV_FAILED;
+
+       if (elr != read_sysreg(elr_el1)) {
+               /*
+                * We took a synchronous exception from the SDEI handler.
+                * This could deadlock, and if you interrupt KVM it will
+                * hyp-panic instead.
+                */
+               pr_warn("unsafe: exception during handler\n");
+       }
+
+       mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK);
+
+       /*
+        * If we interrupted the kernel with interrupts masked, we always go
+        * back to wherever we came from.
+        */
+       if (mode == kernel_mode && !interrupts_enabled(regs))
+               return SDEI_EV_HANDLED;
+
+       /*
+        * Otherwise, we pretend this was an IRQ. This lets user space tasks
+        * receive signals before we return to them, and KVM to invoke it's
+        * world switch to do the same.
+        *
+        * See DDI0487B.a Table D1-7 'Vector offsets from vector table base
+        * address'.
+        */
+       if (mode == kernel_mode)
+               return vbar + 0x280;
+       else if (mode & PSR_MODE32_BIT)
+               return vbar + 0x680;
+
+       return vbar + 0x480;
+}
+
+
+asmlinkage __kprobes notrace unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+       unsigned long ret;
+       bool do_nmi_exit = false;
+
+       /*
+        * nmi_enter() deals with printk() re-entrance and use of RCU when
+        * RCU believed this CPU was idle. Because critical events can
+        * interrupt normal events, we may already be in_nmi().
+        */
+       if (!in_nmi()) {
+               nmi_enter();
+               do_nmi_exit = true;
+       }
+
+       ret = _sdei_handler(regs, arg);
+
+       if (do_nmi_exit)
+               nmi_exit();
+
+       return ret;
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index 551eb07c53b6c76c5c76cf002152dc772148a8c2..3b8ad7be9c3344e419af3513c31eebbcd3ac0602 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -18,6 +18,7 @@
   */
  
  #include <linux/acpi.h>
+#include <linux/arm_sdei.h>
  #include <linux/delay.h>
  #include <linux/init.h>
  #include <linux/spinlock.h>
@@ -836,6 +837,7 @@ static void ipi_cpu_stop(unsigned int cpu)
         set_cpu_online(cpu, false);
  
         local_daif_mask();
+       sdei_mask_local_cpu();
  
         while (1)
                 cpu_relax();
@@ -853,6 +855,7 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
         atomic_dec(&waiting_for_crash_ipi);
  
         local_irq_disable();
+       sdei_mask_local_cpu();
  
  #ifdef CONFIG_HOTPLUG_CPU
         if (cpu_ops[cpu]->cpu_die)
@@ -972,6 +975,8 @@ void smp_send_stop(void)
         if (num_online_cpus() > 1)
                 pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
                            cpumask_pr_args(cpu_online_mask));
+
+       sdei_mask_local_cpu();
  }
  
  #ifdef CONFIG_KEXEC_CORE
@@ -990,8 +995,10 @@ void crash_smp_send_stop(void)
  
         cpus_stopped = 1;
  
-       if (num_online_cpus() == 1)
+       if (num_online_cpus() == 1) {
+               sdei_mask_local_cpu();
                 return;
+       }
  
         cpumask_copy(&mask, cpu_online_mask);
         cpumask_clear_cpu(smp_processor_id(), &mask);
@@ -1009,6 +1016,8 @@ void crash_smp_send_stop(void)
         if (atomic_read(&waiting_for_crash_ipi) > 0)
                 pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
                            cpumask_pr_args(&mask));
+
+       sdei_mask_local_cpu();
  }
  
  bool smp_crash_stop_failed(void)
author	James Morse <james.morse@arm.com>
	Mon, 8 Jan 2018 15:38:12 +0000 (15:38 +0000)
committer	Catalin Marinas <catalin.marinas@arm.com>
	Sat, 13 Jan 2018 10:45:17 +0000 (10:45 +0000)
arch/arm64/include/asm/sdei.h		patch \| blob \| history
arch/arm64/include/asm/stacktrace.h		patch \| blob \| history
arch/arm64/kernel/Makefile		patch \| blob \| history
arch/arm64/kernel/asm-offsets.c		patch \| blob \| history
arch/arm64/kernel/entry.S		patch \| blob \| history
arch/arm64/kernel/sdei.c	[new file with mode: 0644]	patch \| blob
arch/arm64/kernel/smp.c		patch \| blob \| history