Add a GCC asm "memory" clobber to primitives that imply a memory barrier. This signifies to GCC that any potentially aliased memory must be flushed before the operation, and re-read after the operation, so that read or modification in other threads of such memory values will work as intended. In effect, it makes these primitives work as memory barriers for the compiler as well as the CPU. This is better and more correct than adding "volatile" to variables. --- include/atomic/x86-gcc.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) Index: work-5.1-groupcommit/include/atomic/x86-gcc.h =================================================================== --- work-5.1-groupcommit.orig/include/atomic/x86-gcc.h 2010-06-09 11:54:06.000000000 +0200 +++ work-5.1-groupcommit/include/atomic/x86-gcc.h 2010-06-09 11:54:10.000000000 +0200 @@ -38,17 +38,31 @@ #define asm __asm__ #endif +/* + The atomic operations imply a memory barrier for the CPU, to ensure that all + prior writes are flushed from cache, and all subsequent reads reloaded into + cache. + + We need to imply a similar memory barrier for the compiler, so that all + pending stores (to memory that may be aliased in other parts of the code) + will be flushed to memory before the operation, and all reads from such + memory be re-loaded. This is achieved by adding the "memory" pseudo-register + to the clobber list, see GCC documentation for more explanation. + + The compiler and CPU memory barriers are needed to make sure changes in one + thread are made visible in another by the atomic operation. +*/ #ifndef MY_ATOMIC_NO_XADD #define make_atomic_add_body(S) \ - asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) + asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a): : "memory") #endif #define make_atomic_fas_body(S) \ - asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) + asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a) : : "memory") #define make_atomic_cas_body(S) \ int ## S sav; \ asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ : "+m" (*a), "=a" (sav), "=q" (ret) \ - : "r" (set), "a" (*cmp)); \ + : "r" (set), "a" (*cmp) : "memory"); \ if (!ret) \ *cmp= sav @@ -63,9 +77,9 @@ #define make_atomic_load_body(S) \ ret=0; \ asm volatile (LOCK_prefix "; cmpxchg %2, %0" \ - : "+m" (*a), "+a" (ret): "r" (ret)) + : "+m" (*a), "+a" (ret) : "r" (ret) : "memory") #define make_atomic_store_body(S) \ - asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) + asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v) : : "memory") #endif /* TODO test on intel whether the below helps. on AMD it makes no difference */