From 60ffdb36547da2397d6cfefe9c752ebad16524f6 Mon Sep 17 00:00:00 2001 From: Graf Yang Date: Wed, 20 Jan 2010 10:56:24 +0000 Subject: [PATCH] Blackfin: implement nmi_watchdog for SMP on BF561 Signed-off-by: Graf Yang Signed-off-by: Mike Frysinger --- arch/blackfin/Kconfig.debug | 9 + arch/blackfin/include/asm/irq.h | 4 + arch/blackfin/include/asm/nmi.h | 12 + arch/blackfin/include/asm/smp.h | 1 + arch/blackfin/kernel/Makefile | 1 + arch/blackfin/kernel/nmi.c | 313 ++++++++++++++++++++++++++ arch/blackfin/kernel/time-ts.c | 4 + arch/blackfin/mach-common/interrupt.S | 18 +- 8 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 arch/blackfin/include/asm/nmi.h create mode 100644 arch/blackfin/kernel/nmi.c diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug index 87f195ee2e06..1460d7b5edc1 100644 --- a/arch/blackfin/Kconfig.debug +++ b/arch/blackfin/Kconfig.debug @@ -238,6 +238,15 @@ config EARLY_PRINTK all of this lives in the init section and is thrown away after the kernel boots completely. +config NMI_WATCHDOG + bool "Enable NMI watchdog to help debugging lockup on SMP" + default n + depends on (SMP && !BFIN_SCRATCH_REG_RETN) + help + If any CPU in the system does not execute the period local timer + interrupt for more than 5 seconds, then the NMI handler dumps debug + information. This information can be used to debug the lockup. + config CPLB_INFO bool "Display the CPLB information" help diff --git a/arch/blackfin/include/asm/irq.h b/arch/blackfin/include/asm/irq.h index 89de539ed010..12f4060a31b0 100644 --- a/arch/blackfin/include/asm/irq.h +++ b/arch/blackfin/include/asm/irq.h @@ -38,4 +38,8 @@ #include +#ifdef CONFIG_NMI_WATCHDOG +# define ARCH_HAS_NMI_WATCHDOG +#endif + #endif /* _BFIN_IRQ_H_ */ diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h new file mode 100644 index 000000000000..b9caac4fcfd8 --- /dev/null +++ b/arch/blackfin/include/asm/nmi.h @@ -0,0 +1,12 @@ +/* + * Copyright 2010 Analog Devices Inc. + * + * Licensed under the GPL-2 + */ + +#ifndef _BFIN_NMI_H_ +#define _BFIN_NMI_H_ + +#include + +#endif diff --git a/arch/blackfin/include/asm/smp.h b/arch/blackfin/include/asm/smp.h index 29fb88219470..7f26de09ca9c 100644 --- a/arch/blackfin/include/asm/smp.h +++ b/arch/blackfin/include/asm/smp.h @@ -22,6 +22,7 @@ extern char coreb_trampoline_start, coreb_trampoline_end; struct corelock_slot { int lock; }; +extern struct corelock_slot corelock; void smp_icache_flush_range_others(unsigned long start, unsigned long end); diff --git a/arch/blackfin/kernel/Makefile b/arch/blackfin/kernel/Makefile index a8ddbc8ed5af..346a421f1562 100644 --- a/arch/blackfin/kernel/Makefile +++ b/arch/blackfin/kernel/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_CPLB_INFO) += cplbinfo.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KGDB_TESTS) += kgdb_test.o +obj-$(CONFIG_NMI_WATCHDOG) += nmi.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK) += shadow_console.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c new file mode 100644 index 000000000000..19093c17632b --- /dev/null +++ b/arch/blackfin/kernel/nmi.c @@ -0,0 +1,313 @@ +/* + * Blackfin nmi_watchdog Driver + * + * Originally based on bfin_wdt.c + * Copyright 2010-2010 Analog Devices Inc. + * Graff Yang + * + * Enter bugs at http://blackfin.uclinux.org/ + * + * Licensed under the GPL-2 or later. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Bit in WDOG_CTL that indicates watchdog has expired (WDR0) */ +#define WDOG_EXPIRED 0x8000 + +/* Masks for WDEV field in WDOG_CTL register */ +#define ICTL_RESET 0x0 +#define ICTL_NMI 0x2 +#define ICTL_GPI 0x4 +#define ICTL_NONE 0x6 +#define ICTL_MASK 0x6 + +/* Masks for WDEN field in WDOG_CTL register */ +#define WDEN_MASK 0x0FF0 +#define WDEN_ENABLE 0x0000 +#define WDEN_DISABLE 0x0AD0 + +#define DRV_NAME "nmi-wdt" + +#define NMI_WDT_TIMEOUT 5 /* 5 seconds */ +#define NMI_CHECK_TIMEOUT (4 * HZ) /* 4 seconds in jiffies */ +static int nmi_wdt_cpu = 1; + +static unsigned int timeout = NMI_WDT_TIMEOUT; +static int nmi_active; + +static unsigned short wdoga_ctl; +static unsigned int wdoga_cnt; +static struct corelock_slot saved_corelock; +static atomic_t nmi_touched[NR_CPUS]; +static struct timer_list ntimer; + +enum { + COREA_ENTER_NMI = 0, + COREA_EXIT_NMI, + COREB_EXIT_NMI, + + NMI_EVENT_NR, +}; +static unsigned long nmi_event __attribute__ ((__section__(".l2.bss"))); + +/* we are in nmi, non-atomic bit ops is safe */ +static inline void set_nmi_event(int event) +{ + __set_bit(event, &nmi_event); +} + +static inline void wait_nmi_event(int event) +{ + while (!test_bit(event, &nmi_event)) + barrier(); + __clear_bit(event, &nmi_event); +} + +static inline void send_corea_nmi(void) +{ + wdoga_ctl = bfin_read_WDOGA_CTL(); + wdoga_cnt = bfin_read_WDOGA_CNT(); + + bfin_write_WDOGA_CTL(WDEN_DISABLE); + bfin_write_WDOGA_CNT(0); + bfin_write_WDOGA_CTL(WDEN_ENABLE | ICTL_NMI); +} + +static inline void restore_corea_nmi(void) +{ + bfin_write_WDOGA_CTL(WDEN_DISABLE); + bfin_write_WDOGA_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE); + + bfin_write_WDOGA_CNT(wdoga_cnt); + bfin_write_WDOGA_CTL(wdoga_ctl); +} + +static inline void save_corelock(void) +{ + saved_corelock = corelock; + corelock.lock = 0; +} + +static inline void restore_corelock(void) +{ + corelock = saved_corelock; +} + + +static inline void nmi_wdt_keepalive(void) +{ + bfin_write_WDOGB_STAT(0); +} + +static inline void nmi_wdt_stop(void) +{ + bfin_write_WDOGB_CTL(WDEN_DISABLE); +} + +/* before calling this function, you must stop the WDT */ +static inline void nmi_wdt_clear(void) +{ + /* clear TRO bit, disable event generation */ + bfin_write_WDOGB_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE); +} + +static inline void nmi_wdt_start(void) +{ + bfin_write_WDOGB_CTL(WDEN_ENABLE | ICTL_NMI); +} + +static inline int nmi_wdt_running(void) +{ + return ((bfin_read_WDOGB_CTL() & WDEN_MASK) != WDEN_DISABLE); +} + +static inline int nmi_wdt_set_timeout(unsigned long t) +{ + u32 cnt, max_t, sclk; + int run; + + sclk = get_sclk(); + max_t = -1 / sclk; + cnt = t * sclk; + if (t > max_t) { + pr_warning("NMI: timeout value is too large\n"); + return -EINVAL; + } + + run = nmi_wdt_running(); + nmi_wdt_stop(); + bfin_write_WDOGB_CNT(cnt); + if (run) + nmi_wdt_start(); + + timeout = t; + + return 0; +} + +int check_nmi_wdt_touched(void) +{ + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + cpumask_t mask = cpu_online_map; + + if (!atomic_read(&nmi_touched[this_cpu])) + return 0; + + atomic_set(&nmi_touched[this_cpu], 0); + + cpu_clear(this_cpu, mask); + for_each_cpu_mask(cpu, mask) { + invalidate_dcache_range((unsigned long)(&nmi_touched[cpu]), + (unsigned long)(&nmi_touched[cpu])); + if (!atomic_read(&nmi_touched[cpu])) + return 0; + atomic_set(&nmi_touched[cpu], 0); + } + + return 1; +} + +static void nmi_wdt_timer(unsigned long data) +{ + if (check_nmi_wdt_touched()) + nmi_wdt_keepalive(); + + mod_timer(&ntimer, jiffies + NMI_CHECK_TIMEOUT); +} + +static int __init init_nmi_wdt(void) +{ + nmi_wdt_set_timeout(timeout); + nmi_wdt_start(); + nmi_active = true; + + init_timer(&ntimer); + ntimer.function = nmi_wdt_timer; + ntimer.expires = jiffies + NMI_CHECK_TIMEOUT; + add_timer(&ntimer); + + pr_info("nmi_wdt: initialized: timeout=%d sec\n", timeout); + return 0; +} +device_initcall(init_nmi_wdt); + +void touch_nmi_watchdog(void) +{ + atomic_set(&nmi_touched[smp_processor_id()], 1); +} + +/* Suspend/resume support */ +#ifdef CONFIG_PM +static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state) +{ + nmi_wdt_stop(); + return 0; +} + +static int nmi_wdt_resume(struct sys_device *dev) +{ + if (nmi_active) + nmi_wdt_start(); + return 0; +} + +static struct sysdev_class nmi_sysclass = { + .name = DRV_NAME, + .resume = nmi_wdt_resume, + .suspend = nmi_wdt_suspend, +}; + +static struct sys_device device_nmi_wdt = { + .id = 0, + .cls = &nmi_sysclass, +}; + +static int __init init_nmi_wdt_sysfs(void) +{ + int error; + + if (!nmi_active) + return 0; + + error = sysdev_class_register(&nmi_sysclass); + if (!error) + error = sysdev_register(&device_nmi_wdt); + return error; +} +late_initcall(init_nmi_wdt_sysfs); + +#endif /* CONFIG_PM */ + + +asmlinkage notrace void do_nmi(struct pt_regs *fp) +{ + unsigned int cpu = smp_processor_id(); + nmi_enter(); + + cpu_pda[cpu].__nmi_count += 1; + + if (cpu == nmi_wdt_cpu) { + /* CoreB goes here first */ + + /* reload the WDOG_STAT */ + nmi_wdt_keepalive(); + + /* clear nmi interrupt for CoreB */ + nmi_wdt_stop(); + nmi_wdt_clear(); + + /* trigger NMI interrupt of CoreA */ + send_corea_nmi(); + + /* waiting CoreB to enter NMI */ + wait_nmi_event(COREA_ENTER_NMI); + + /* recover WDOGA's settings */ + restore_corea_nmi(); + + save_corelock(); + + /* corelock is save/cleared, CoreA is dummping messages */ + + wait_nmi_event(COREA_EXIT_NMI); + } else { + /* OK, CoreA entered NMI */ + set_nmi_event(COREA_ENTER_NMI); + } + + pr_emerg("\nNMI Watchdog detected LOCKUP, dump for CPU %d\n", cpu); + dump_bfin_process(fp); + dump_bfin_mem(fp); + show_regs(fp); + dump_bfin_trace_buffer(); + show_stack(current, (unsigned long *)fp); + + if (cpu == nmi_wdt_cpu) { + pr_emerg("This fault is not recoverable, sorry!\n"); + + /* CoreA dump finished, restore the corelock */ + restore_corelock(); + + set_nmi_event(COREB_EXIT_NMI); + } else { + /* CoreB dump finished, notice the CoreA we are done */ + set_nmi_event(COREA_EXIT_NMI); + + /* synchronize with CoreA */ + wait_nmi_event(COREB_EXIT_NMI); + } + + nmi_exit(); +} diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index a351f97c87a3..41a907596c70 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) @@ -309,6 +310,9 @@ irqreturn_t bfin_coretmr_interrupt(int irq, void *dev_id) smp_mb(); evt->event_handler(evt); + + touch_nmi_watchdog(); + return IRQ_HANDLED; } diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S index 0a0c088ead8c..cee62cf4acd4 100644 --- a/arch/blackfin/mach-common/interrupt.S +++ b/arch/blackfin/mach-common/interrupt.S @@ -194,12 +194,28 @@ ENTRY(_evt_ivhw) ENDPROC(_evt_ivhw) /* Interrupt routine for evt2 (NMI). - * We don't actually use this, so just return. * For inner circle type details, please see: * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:nmi */ ENTRY(_evt_nmi) +#ifndef CONFIG_NMI_WATCHDOG .weak _evt_nmi +#else + /* Not take account of CPLBs, this handler will not return */ + SAVE_ALL_SYS + r0 = sp; + r1 = retn; + [sp + PT_PC] = r1; + trace_buffer_save(p4,r5); + + ANOMALY_283_315_WORKAROUND(p4, r5) + + SP += -12; + call _do_nmi; + SP += 12; +1: + jump 1b; +#endif rtn; ENDPROC(_evt_nmi) -- 2.30.2