x86/cpufeature: Macrofy inline assembly code to work around GCC inlining bugs
authorNadav Amit <namit@vmware.com>
Fri, 5 Oct 2018 20:27:17 +0000 (13:27 -0700)
committerIngo Molnar <mingo@kernel.org>
Sat, 6 Oct 2018 13:52:16 +0000 (15:52 +0200)
As described in:

  77b0bf55bc67: ("kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs")

GCC's inlining heuristics are broken with common asm() patterns used in
kernel code, resulting in the effective disabling of inlining.

The workaround is to set an assembly macro and call it from the inline
assembly block - which is pretty pointless indirection in the static_cpu_has()
case, but is worth it to improve overall inlining quality.

The patch slightly increases the kernel size:

      text     data     bss      dec     hex  filename
  18162879 10226256 2957312 31346447 1de4f0f  ./vmlinux before
  18163528 10226300 2957312 31347140 1de51c4  ./vmlinux after (+693)

And enables the inlining of function such as free_ldt_pgtables().

Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181005202718.229565-3-namit@vmware.com
Link: https://lore.kernel.org/lkml/20181003213100.189959-10-namit@vmware.com/T/#u
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/cpufeature.h
arch/x86/kernel/macros.S

index aced6c9290d6f96cdaf4eaadab3dd3835d80b94a..7d442722ef241b684c348dc47b6be916d4728a3d 100644 (file)
@@ -2,10 +2,10 @@
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#include <asm/processor.h>
-
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
 #include <asm/asm.h>
 #include <linux/bitops.h>
 
@@ -161,37 +161,10 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-       asm_volatile_goto("1: jmp 6f\n"
-                "2:\n"
-                ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-                        "((5f-4f) - (2b-1b)),0x90\n"
-                "3:\n"
-                ".section .altinstructions,\"a\"\n"
-                " .long 1b - .\n"              /* src offset */
-                " .long 4f - .\n"              /* repl offset */
-                " .word %P[always]\n"          /* always replace */
-                " .byte 3b - 1b\n"             /* src len */
-                " .byte 5f - 4f\n"             /* repl len */
-                " .byte 3b - 2b\n"             /* pad len */
-                ".previous\n"
-                ".section .altinstr_replacement,\"ax\"\n"
-                "4: jmp %l[t_no]\n"
-                "5:\n"
-                ".previous\n"
-                ".section .altinstructions,\"a\"\n"
-                " .long 1b - .\n"              /* src offset */
-                " .long 0\n"                   /* no replacement */
-                " .word %P[feature]\n"         /* feature bit */
-                " .byte 3b - 1b\n"             /* src len */
-                " .byte 0\n"                   /* repl len */
-                " .byte 0\n"                   /* pad len */
-                ".previous\n"
-                ".section .altinstr_aux,\"ax\"\n"
-                "6:\n"
-                " testb %[bitnum],%[cap_byte]\n"
-                " jnz %l[t_yes]\n"
-                " jmp %l[t_no]\n"
-                ".previous\n"
+       asm_volatile_goto("STATIC_CPU_HAS bitnum=%[bitnum] "
+                         "cap_byte=\"%[cap_byte]\" "
+                         "feature=%P[feature] t_yes=%l[t_yes] "
+                         "t_no=%l[t_no] always=%P[always]"
                 : : [feature]  "i" (bit),
                     [always]   "i" (X86_FEATURE_ALWAYS),
                     [bitnum]   "i" (1 << (bit & 7)),
@@ -226,5 +199,44 @@ t_no:
 #define CPU_FEATURE_TYPEVAL            boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
                                        boot_cpu_data.x86_model
 
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#else /* __ASSEMBLY__ */
+
+.macro STATIC_CPU_HAS bitnum:req cap_byte:req feature:req t_yes:req t_no:req always:req
+1:
+       jmp 6f
+2:
+       .skip -(((5f-4f) - (2b-1b)) > 0) * ((5f-4f) - (2b-1b)),0x90
+3:
+       .section .altinstructions,"a"
+       .long 1b - .            /* src offset */
+       .long 4f - .            /* repl offset */
+       .word \always           /* always replace */
+       .byte 3b - 1b           /* src len */
+       .byte 5f - 4f           /* repl len */
+       .byte 3b - 2b           /* pad len */
+       .previous
+       .section .altinstr_replacement,"ax"
+4:
+       jmp \t_no
+5:
+       .previous
+       .section .altinstructions,"a"
+       .long 1b - .            /* src offset */
+       .long 0                 /* no replacement */
+       .word \feature          /* feature bit */
+       .byte 3b - 1b           /* src len */
+       .byte 0                 /* repl len */
+       .byte 0                 /* pad len */
+       .previous
+       .section .altinstr_aux,"ax"
+6:
+       testb \bitnum,\cap_byte
+       jnz \t_yes
+       jmp \t_no
+       .previous
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
 #endif /* _ASM_X86_CPUFEATURE_H */
index 7baa40d5bf16d0335b26e2d32c4a44d0d0802fd9..bf8b9c93e2552863c67e1c741d78dbe7a0f35e14 100644 (file)
@@ -12,3 +12,4 @@
 #include <asm/bug.h>
 #include <asm/paravirt.h>
 #include <asm/asm.h>
+#include <asm/cpufeature.h>