timekeeping/vsyscall: Prevent math overflow in BOOTTIME update
authorThomas Gleixner <tglx@linutronix.de>
Thu, 22 Aug 2019 11:00:15 +0000 (13:00 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Fri, 23 Aug 2019 00:12:11 +0000 (02:12 +0200)
The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
nanoseconds based boot time offset left by the clocksource shift. That
overflows once the boot time offset becomes large enough. As a consequence
CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
misbehave.

Fix it by storing a timespec64 representation of the offset when boot time
is adjusted and add that to the MONOTONIC base time value in the vdso data
page. Using the timespec64 representation avoids a 64bit division in the
update code.

Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation")
Reported-by: Chris Clayton <chris2553@googlemail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Chris Clayton <chris2553@googlemail.com>
Tested-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908221257580.1983@nanos.tec.linutronix.de
include/linux/timekeeper_internal.h
kernel/time/timekeeping.c
kernel/time/vsyscall.c

index 7acb953298a73118028ae04052fab9249529380f..84ff2844df2a8bc240b7186b7258fbef20c62940 100644 (file)
@@ -57,6 +57,7 @@ struct tk_read_base {
  * @cs_was_changed_seq:        The sequence number of clocksource change events
  * @next_leap_ktime:   CLOCK_MONOTONIC time value of a pending leap-second
  * @raw_sec:           CLOCK_MONOTONIC_RAW  time in seconds
+ * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
  * @cycle_interval:    Number of clock cycles in one NTP interval
  * @xtime_interval:    Number of clock shifted nano seconds in one NTP
  *                     interval.
@@ -84,6 +85,9 @@ struct tk_read_base {
  *
  * wall_to_monotonic is no longer the boot time, getboottime must be
  * used instead.
+ *
+ * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
+ * accelerate the VDSO update for CLOCK_BOOTTIME.
  */
 struct timekeeper {
        struct tk_read_base     tkr_mono;
@@ -99,6 +103,7 @@ struct timekeeper {
        u8                      cs_was_changed_seq;
        ktime_t                 next_leap_ktime;
        u64                     raw_sec;
+       struct timespec64       monotonic_to_boot;
 
        /* The following members are for timekeeping internal use */
        u64                     cycle_interval;
index d911c8470149e87330838153abfd511f868fe383..ca69290bee2a3131358993e9a3bbc32c6a9a9231 100644 (file)
@@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 {
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
+       /*
+        * Timespec representation for VDSO update to avoid 64bit division
+        * on every update.
+        */
+       tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
 }
 
 /*
index 8cf3596a4ce677ca787e214d6f33b32a6d6b4088..4bc37ac3bb0526994e2dcacd20369e6f2e98f8d3 100644 (file)
@@ -17,7 +17,7 @@ static inline void update_vdso_data(struct vdso_data *vdata,
                                    struct timekeeper *tk)
 {
        struct vdso_timestamp *vdso_ts;
-       u64 nsec;
+       u64 nsec, sec;
 
        vdata[CS_HRES_COARSE].cycle_last        = tk->tkr_mono.cycle_last;
        vdata[CS_HRES_COARSE].mask              = tk->tkr_mono.mask;
@@ -45,23 +45,27 @@ static inline void update_vdso_data(struct vdso_data *vdata,
        }
        vdso_ts->nsec   = nsec;
 
-       /* CLOCK_MONOTONIC_RAW */
-       vdso_ts         = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
-       vdso_ts->sec    = tk->raw_sec;
-       vdso_ts->nsec   = tk->tkr_raw.xtime_nsec;
+       /* Copy MONOTONIC time for BOOTTIME */
+       sec     = vdso_ts->sec;
+       /* Add the boot offset */
+       sec     += tk->monotonic_to_boot.tv_sec;
+       nsec    += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
 
        /* CLOCK_BOOTTIME */
        vdso_ts         = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
-       vdso_ts->sec    = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-       nsec = tk->tkr_mono.xtime_nsec;
-       nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
-                      ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
+       vdso_ts->sec    = sec;
+
        while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
                vdso_ts->sec++;
        }
        vdso_ts->nsec   = nsec;
 
+       /* CLOCK_MONOTONIC_RAW */
+       vdso_ts         = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+       vdso_ts->sec    = tk->raw_sec;
+       vdso_ts->nsec   = tk->tkr_raw.xtime_nsec;
+
        /* CLOCK_TAI */
        vdso_ts         = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
        vdso_ts->sec    = tk->xtime_sec + (s64)tk->tai_offset;