x86/tsc: Introduce early tsc clocksource
authorPeter Zijlstra <peterz@infradead.org>
Fri, 22 Dec 2017 09:20:13 +0000 (10:20 +0100)
committerThomas Gleixner <tglx@linutronix.de>
Sun, 14 Jan 2018 19:18:23 +0000 (20:18 +0100)
Without TSC_KNOWN_FREQ the TSC clocksource is registered so late that the
kernel first switches to the HPET. Using HPET on large CPU count machines is
undesirable.

Therefore register a tsc-early clocksource using the preliminary tsc_khz
from quick calibration. Then when the final TSC calibration is done, it
can switch to the tuned frequency.

The only notably problem is that the real tsc clocksource must be marked
with CLOCK_SOURCE_VALID_FOR_HRES, otherwise it will not be selected when
unregistering tsc-early. tsc-early cannot be left registered, because then
the clocksource code would fall back to it when we tsc clocksource is
marked unstable later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: len.brown@intel.com
Cc: rui.zhang@intel.com
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20171222092243.431585460@infradead.org
arch/x86/kernel/tsc.c

index a2c9dd8bfc6f493c777582e848284d19a2c441a8..fb430273841023fd1ebf3507440cd5510f37b012 100644 (file)
@@ -1006,8 +1006,6 @@ static void __init detect_art(void)
 
 /* clocksource code */
 
-static struct clocksource clocksource_tsc;
-
 static void tsc_resume(struct clocksource *cs)
 {
        tsc_verify_tsc_adjust(true);
@@ -1058,12 +1056,31 @@ static void tsc_cs_tick_stable(struct clocksource *cs)
 /*
  * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
  */
+static struct clocksource clocksource_tsc_early = {
+       .name                   = "tsc-early",
+       .rating                 = 299,
+       .read                   = read_tsc,
+       .mask                   = CLOCKSOURCE_MASK(64),
+       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
+                                 CLOCK_SOURCE_MUST_VERIFY,
+       .archdata               = { .vclock_mode = VCLOCK_TSC },
+       .resume                 = tsc_resume,
+       .mark_unstable          = tsc_cs_mark_unstable,
+       .tick_stable            = tsc_cs_tick_stable,
+};
+
+/*
+ * Must mark VALID_FOR_HRES early such that when we unregister tsc_early
+ * this one will immediately take over. We will only register if TSC has
+ * been found good.
+ */
 static struct clocksource clocksource_tsc = {
        .name                   = "tsc",
        .rating                 = 300,
        .read                   = read_tsc,
        .mask                   = CLOCKSOURCE_MASK(64),
        .flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
+                                 CLOCK_SOURCE_VALID_FOR_HRES |
                                  CLOCK_SOURCE_MUST_VERIFY,
        .archdata               = { .vclock_mode = VCLOCK_TSC },
        .resume                 = tsc_resume,
@@ -1187,8 +1204,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
        int cpu;
 
        /* Don't bother refining TSC on unstable systems */
-       if (check_tsc_unstable())
-               goto out;
+       if (tsc_unstable)
+               return;
 
        /*
         * Since the work is started early in boot, we may be
@@ -1240,9 +1257,13 @@ static void tsc_refine_calibration_work(struct work_struct *work)
                set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
 
 out:
+       if (tsc_unstable)
+               return;
+
        if (boot_cpu_has(X86_FEATURE_ART))
                art_related_clocksource = &clocksource_tsc;
        clocksource_register_khz(&clocksource_tsc, tsc_khz);
+       clocksource_unregister(&clocksource_tsc_early);
 }
 
 
@@ -1251,13 +1272,11 @@ static int __init init_tsc_clocksource(void)
        if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
                return 0;
 
+       if (check_tsc_unstable())
+               return 0;
+
        if (tsc_clocksource_reliable)
                clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-       /* lower the rating if we already know its unstable: */
-       if (check_tsc_unstable()) {
-               clocksource_tsc.rating = 0;
-               clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
-       }
 
        if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
                clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@@ -1270,6 +1289,7 @@ static int __init init_tsc_clocksource(void)
                if (boot_cpu_has(X86_FEATURE_ART))
                        art_related_clocksource = &clocksource_tsc;
                clocksource_register_khz(&clocksource_tsc, tsc_khz);
+               clocksource_unregister(&clocksource_tsc_early);
                return 0;
        }
 
@@ -1374,9 +1394,12 @@ void __init tsc_init(void)
 
        check_system_tsc_reliable();
 
-       if (unsynchronized_tsc())
+       if (unsynchronized_tsc()) {
                mark_tsc_unstable("TSCs unsynchronized");
+               return;
+       }
 
+       clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
        detect_art();
 }