ARC: LLOCK/SCOND based spin_lock
authorVineet Gupta <vgupta@synopsys.com>
Tue, 14 Jul 2015 12:25:05 +0000 (17:55 +0530)
committerVineet Gupta <vgupta@synopsys.com>
Tue, 4 Aug 2015 03:56:33 +0000 (09:26 +0530)
Current spin_lock uses EXchange instruction to implement the atomic test
and set of lock location (reads orig value and ST 1). This however forces
the cacheline into exclusive state (because of the ST) and concurrent
loops in multiple cores will bounce the line around between cores.

Instead, use LLOCK/SCOND to implement the atomic test and set which is
better as line is in shared state while lock is spinning on LLOCK

The real motivation of this change however is to make way for future
changes in atomics to implement delayed retry (with backoff).
Initial experiment with delayed retry in atomics combined with orig
EX based spinlock was a total disaster (broke even LMBench) as
struct sock has a cache line sharing an atomic_t and spinlock. The
tight spinning on lock, caused the atomic retry to keep backing off
such that it would never finish.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
arch/arc/include/asm/spinlock.h

index e1651df6a93d5bc8ab0af3a833c7c6ffd23acacc..4f6c90a0a68a426b35d616ec9ff477dd10d0d6d6 100644 (file)
 #define arch_spin_unlock_wait(x) \
        do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
 
+#ifdef CONFIG_ARC_HAS_LLSC
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+       unsigned int val;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "1:     llock   %[val], [%[slock]]      \n"
+       "       breq    %[val], %[LOCKED], 1b   \n"     /* spin while LOCKED */
+       "       scond   %[LOCKED], [%[slock]]   \n"     /* acquire */
+       "       bnz     1b                      \n"
+       "                                       \n"
+       : [val]         "=&r"   (val)
+       : [slock]       "r"     (&(lock->slock)),
+         [LOCKED]      "r"     (__ARCH_SPIN_LOCK_LOCKED__)
+       : "memory", "cc");
+
+       smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+       unsigned int val, got_it = 0;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "1:     llock   %[val], [%[slock]]      \n"
+       "       breq    %[val], %[LOCKED], 4f   \n"     /* already LOCKED, just bail */
+       "       scond   %[LOCKED], [%[slock]]   \n"     /* acquire */
+       "       bnz     1b                      \n"
+       "       mov     %[got_it], 1            \n"
+       "4:                                     \n"
+       "                                       \n"
+       : [val]         "=&r"   (val),
+         [got_it]      "+&r"   (got_it)
+       : [slock]       "r"     (&(lock->slock)),
+         [LOCKED]      "r"     (__ARCH_SPIN_LOCK_LOCKED__)
+       : "memory", "cc");
+
+       smp_mb();
+
+       return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+       smp_mb();
+
+       lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+       smp_mb();
+}
+
+#else  /* !CONFIG_ARC_HAS_LLSC */
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-       unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+       unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
 
        /*
         * This smp_mb() is technically superfluous, we only need the one
@@ -33,7 +92,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
        __asm__ __volatile__(
        "1:     ex  %0, [%1]            \n"
        "       breq  %0, %2, 1b        \n"
-       : "+&r" (tmp)
+       : "+&r" (val)
        : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
        : "memory");
 
@@ -48,26 +107,27 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
        smp_mb();
 }
 
+/* 1 - lock taken successfully */
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
-       unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
+       unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
 
        smp_mb();
 
        __asm__ __volatile__(
        "1:     ex  %0, [%1]            \n"
-       : "+r" (tmp)
+       : "+r" (val)
        : "r"(&(lock->slock))
        : "memory");
 
        smp_mb();
 
-       return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
+       return (val == __ARCH_SPIN_LOCK_UNLOCKED__);
 }
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-       unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+       unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__;
 
        /*
         * RELEASE barrier: given the instructions avail on ARCv2, full barrier
@@ -77,7 +137,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
        __asm__ __volatile__(
        "       ex  %0, [%1]            \n"
-       : "+r" (tmp)
+       : "+r" (val)
        : "r"(&(lock->slock))
        : "memory");
 
@@ -88,6 +148,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
        smp_mb();
 }
 
+#endif
+
 /*
  * Read-write spinlocks, allowing multiple readers but only one writer.
  *