powerpc/64: Implement clear_bit_unlock_is_negative_byte()
authorNicholas Piggin <npiggin@gmail.com>
Tue, 3 Jan 2017 18:58:28 +0000 (04:58 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Sat, 18 Feb 2017 03:40:01 +0000 (14:40 +1100)
Commit b91e1302ad9b8 ("mm: optimize PageWaiters bit use for
unlock_page()") added a special bitop function to speed up
unlock_page(). Implement this for 64-bit powerpc.

This improves the unlock_page() core code from this:

li 9,1
lwsync
1: ldarx 10,0,3,0
andc 10,10,9
stdcx. 10,0,3
bne- 1b
ori 2,2,0
ld 9,0(3)
andi. 10,9,0x80
beqlr
li 4,0
b wake_up_page_bit

To this:

li 10,1
lwsync
1: ldarx 9,0,3,0
andc 9,9,10
stdcx. 9,0,3
bne- 1b
andi. 10,9,0x80
beqlr
li 4,0
b wake_up_page_bit

In a test of elapsed time for dd writing into 16GB of already-dirty
pagecache on a POWER8 with 4K pages, which has one unlock_page per 4kB
this patch reduced overhead by 1.1%:

    N           Min           Max        Median           Avg        Stddev
x  19         2.578         2.619         2.594         2.595         0.011
+  19         2.552         2.592         2.564         2.565         0.008
Difference at 95.0% confidence
-0.030  +/- 0.006
-1.142% +/- 0.243%

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Made 64-bit only until I can test it properly on 32-bit]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/bitops.h

index 59abc620f8e8a10de4e7329993092af678f8778e..73eb794d6163811c45729984f0d9fb06bbde31a9 100644 (file)
@@ -154,6 +154,34 @@ static __inline__ int test_and_change_bit(unsigned long nr,
        return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
+#ifdef CONFIG_PPC64
+static __inline__ unsigned long clear_bit_unlock_return_word(int nr,
+                                               volatile unsigned long *addr)
+{
+       unsigned long old, t;
+       unsigned long *p = (unsigned long *)addr + BIT_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+
+       __asm__ __volatile__ (
+       PPC_RELEASE_BARRIER
+"1:"   PPC_LLARX(%0,0,%3,0) "\n"
+       "andc %1,%0,%2\n"
+       PPC405_ERR77(0,%3)
+       PPC_STLCX "%1,0,%3\n"
+       "bne- 1b\n"
+       : "=&r" (old), "=&r" (t)
+       : "r" (mask), "r" (p)
+       : "cc", "memory");
+
+       return old;
+}
+
+/* This is a special function for mm/filemap.c */
+#define clear_bit_unlock_is_negative_byte(nr, addr)                    \
+       (clear_bit_unlock_return_word(nr, addr) & BIT_MASK(PG_waiters))
+
+#endif /* CONFIG_PPC64 */
+
 #include <asm-generic/bitops/non-atomic.h>
 
 static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)