administrators to configure the period of the hrtimer and the perf
event. The right value for a particular environment is a trade-off
between fast response to lockups and detection overhead.
+
+By default, the watchdog runs on all online cores. However, on a
+kernel configured with NO_HZ_FULL, by default the watchdog runs only
+on the housekeeping cores, not the cores specified in the "nohz_full"
+boot argument. If we allowed the watchdog to run by default on
+the "nohz_full" cores, we would have to run timer ticks to activate
+the scheduler, which would prevent the "nohz_full" functionality
+from protecting the user code on those cores from the kernel.
+Of course, disabling it by default on the nohz_full cores means that
+when those cores do enter the kernel, by default we will not be
+able to detect if they lock up. However, allowing the watchdog
+to continue to run on the housekeeping (non-tickless) cores means
+that we will continue to detect lockups properly on those cores.
+
+In either case, the set of cores excluded from running the watchdog
+may be adjusted via the kernel.watchdog_cpumask sysctl. For
+nohz_full cores, this may be useful for debugging a case where the
+kernel seems to be hanging on the nohz_full cores.
==============================================================
+watchdog_cpumask:
+
+This value can be used to control on which cpus the watchdog may run.
+The default cpumask is all possible cores, but if NO_HZ_FULL is
+enabled in the kernel config, and cores are specified with the
+nohz_full= boot argument, those cores are excluded by default.
+Offline cores can be included in this mask, and if the core is later
+brought online, the watchdog will be started based on the mask value.
+
+Typically this value would only be touched in the nohz_full case
+to re-enable cores that by default were not running the watchdog,
+if a kernel lockup was suspected on those cores.
+
+The argument value is the standard cpulist format for cpumasks,
+so for example to enable the watchdog on cores 0, 2, 3, and 4 you
+might say:
+
+ echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask
+
+==============================================================
+
watchdog_thresh:
This value can be used to control the frequency of hrtimer and NMI
extern int soft_watchdog_enabled;
extern int watchdog_user_enabled;
extern int watchdog_thresh;
+extern unsigned long *watchdog_cpumask_bits;
extern int sysctl_softlockup_all_cpu_backtrace;
struct ctl_table;
extern int proc_watchdog(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int proc_watchdog_thresh(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
+extern int proc_watchdog_cpumask(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
#endif
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
*
* The cpumask field in the smp_hotplug_thread must not be updated directly
* by the client, but only by calling this function.
+ * This function can only be called on a registered smp_hotplug_thread.
*/
int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *new)
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
{
.procname = "softlockup_panic",
.data = &softlockup_panic,
#include <linux/sysctl.h>
#include <linux/smpboot.h>
#include <linux/sched/rt.h>
+#include <linux/tick.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#endif
+static struct cpumask watchdog_cpumask __read_mostly;
+unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
static int __read_mostly watchdog_running;
static u64 __read_mostly sample_period;
* do we care if a 0 races with a timestamp?
* all it means is the softlock check starts one cycle later
*/
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
per_cpu(watchdog_touch_ts, cpu) = 0;
}
goto unlock;
get_online_cpus();
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
watchdog_nmi_enable(cpu);
put_online_cpus();
goto unlock;
get_online_cpus();
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
watchdog_nmi_disable(cpu);
put_online_cpus();
int cpu;
get_online_cpus();
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
update_watchdog(cpu);
put_online_cpus();
}
err = smpboot_register_percpu_thread(&watchdog_threads);
if (err)
pr_err("Failed to create watchdog threads, disabled\n");
- else
+ else {
+ if (smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask))
+ pr_err("Failed to set cpumask for watchdog threads\n");
watchdog_running = 1;
+ }
} else {
/*
* Enable/disable the lockup detectors or
mutex_unlock(&watchdog_proc_mutex);
return err;
}
+
+/*
+ * The cpumask is the mask of possible cpus that the watchdog can run
+ * on, not the mask of cpus it is actually running on. This allows the
+ * user to specify a mask that will include cpus that have not yet
+ * been brought online, if desired.
+ */
+int proc_watchdog_cpumask(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err;
+
+ mutex_lock(&watchdog_proc_mutex);
+ err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
+ if (!err && write) {
+ /* Remove impossible cpus to keep sysctl output cleaner. */
+ cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
+ cpu_possible_mask);
+
+ if (watchdog_running) {
+ /*
+ * Failure would be due to being unable to allocate
+ * a temporary cpumask, so we are likely not in a
+ * position to do much else to make things better.
+ */
+ if (smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask) != 0)
+ pr_err("cpumask update failed\n");
+ }
+ }
+ mutex_unlock(&watchdog_proc_mutex);
+ return err;
+}
+
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
{
set_sample_period();
+#ifdef CONFIG_NO_HZ_FULL
+ if (tick_nohz_full_enabled()) {
+ if (!cpumask_empty(tick_nohz_full_mask))
+ pr_info("Disabling watchdog on nohz_full cores by default\n");
+ cpumask_andnot(&watchdog_cpumask, cpu_possible_mask,
+ tick_nohz_full_mask);
+ } else
+ cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#else
+ cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#endif
+
if (watchdog_enabled)
watchdog_enable_all_cpus();
}