genirq/cpuhotplug: Handle managed IRQs on CPU hotplug
authorThomas Gleixner <tglx@linutronix.de>
Mon, 19 Jun 2017 23:37:51 +0000 (01:37 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Thu, 22 Jun 2017 16:21:25 +0000 (18:21 +0200)
If a CPU goes offline, interrupts affine to the CPU are moved away. If the
outgoing CPU is the last CPU in the affinity mask the migration code breaks
the affinity and sets it it all online cpus.

This is a problem for affinity managed interrupts as CPU hotplug is often
used for power management purposes. If the affinity is broken, the
interrupt is not longer affine to the CPUs to which it was allocated.

The affinity spreading allows to lay out multi queue devices in a way that
they are assigned to a single CPU or a group of CPUs. If the last CPU goes
offline, then the queue is not longer used, so the interrupt can be
shutdown gracefully and parked until one of the assigned CPUs comes online
again.

Add a graceful shutdown mechanism into the irq affinity breaking code path,
mark the irq as MANAGED_SHUTDOWN and leave the affinity mask unmodified.

In the online path, scan the active interrupts for managed interrupts and
if the interrupt is functional and the newly online CPU is part of the
affinity mask, restart the interrupt if it is marked MANAGED_SHUTDOWN or if
the interrupts is started up, try to add the CPU back to the effective
affinity mask.

Originally-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170619235447.273417334@linutronix.de
include/linux/cpuhotplug.h
include/linux/irq.h
kernel/cpu.c
kernel/irq/cpuhotplug.c

index 0f2a80377520ec1d12299c78775d7162c63ac8c2..c15f22c5453550758184903299f199921f084d2b 100644 (file)
@@ -124,6 +124,7 @@ enum cpuhp_state {
        CPUHP_AP_ONLINE_IDLE,
        CPUHP_AP_SMPBOOT_THREADS,
        CPUHP_AP_X86_VDSO_VMA_ONLINE,
+       CPUHP_AP_IRQ_AFFINITY_ONLINE,
        CPUHP_AP_PERF_ONLINE,
        CPUHP_AP_PERF_X86_ONLINE,
        CPUHP_AP_PERF_X86_UNCORE_ONLINE,
index 807042b46af14b5d3e744838f796d75a1951f9dc..19cea63265993ceac3ad6ea658af1438bec79157 100644 (file)
@@ -500,7 +500,12 @@ extern int irq_set_affinity_locked(struct irq_data *data,
                                   const struct cpumask *cpumask, bool force);
 extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION)
 extern void irq_migrate_all_off_this_cpu(void);
+extern int irq_affinity_online_cpu(unsigned int cpu);
+#else
+# define irq_affinity_online_cpu       NULL
+#endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
index cb5103413bd8df5056b3eda682a8d93b08656dd8..b86b32ebb3b2eeeb15a4cec3bbb65912d67809ec 100644 (file)
@@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
                .startup.single         = smpboot_unpark_threads,
                .teardown.single        = NULL,
        },
+       [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
+               .name                   = "irq/affinity:online",
+               .startup.single         = irq_affinity_online_cpu,
+               .teardown.single        = NULL,
+       },
        [CPUHP_AP_PERF_ONLINE] = {
                .name                   = "perf:online",
                .startup.single         = perf_event_init_cpu,
index 0b093db3336b152ee51a6628198e1f4129833089..b7964e72ded7171ad23089e4134c9aaa1ec61a69 100644 (file)
@@ -83,6 +83,15 @@ static bool migrate_one_irq(struct irq_desc *desc)
                chip->irq_mask(d);
 
        if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+               /*
+                * If the interrupt is managed, then shut it down and leave
+                * the affinity untouched.
+                */
+               if (irqd_affinity_is_managed(d)) {
+                       irqd_set_managed_shutdown(d);
+                       irq_shutdown(desc);
+                       return false;
+               }
                affinity = cpu_online_mask;
                brokeaff = true;
        }
@@ -129,3 +138,39 @@ void irq_migrate_all_off_this_cpu(void)
                }
        }
 }
+
+static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
+{
+       struct irq_data *data = irq_desc_get_irq_data(desc);
+       const struct cpumask *affinity = irq_data_get_affinity_mask(data);
+
+       if (!irqd_affinity_is_managed(data) || !desc->action ||
+           !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
+               return;
+
+       if (irqd_is_managed_and_shutdown(data))
+               irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+       else
+               irq_set_affinity_locked(data, affinity, false);
+}
+
+/**
+ * irq_affinity_online_cpu - Restore affinity for managed interrupts
+ * @cpu:       Upcoming CPU for which interrupts should be restored
+ */
+int irq_affinity_online_cpu(unsigned int cpu)
+{
+       struct irq_desc *desc;
+       unsigned int irq;
+
+       irq_lock_sparse();
+       for_each_active_irq(irq) {
+               desc = irq_to_desc(irq);
+               raw_spin_lock_irq(&desc->lock);
+               irq_restore_affinity_of_irq(desc, cpu);
+               raw_spin_unlock_irq(&desc->lock);
+       }
+       irq_unlock_sparse();
+
+       return 0;
+}