/* * Akupara/threading/atomic_ops_gcc_x86.hpp * * * Created by Udi Barzilai on 06/06. * Copyright 2006 __MyCompanyName__. All rights reserved. * */ #if !defined(_AKUPARA_THREADING_ATOMIC_OPS_GCC_X86_HPP__INCLUDED_) # define _AKUPARA_THREADING_ATOMIC_OPS_GCC_X86_HPP__INCLUDED_ # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) namespace Akupara { namespace threading { namespace atomic { namespace machine { const unsigned int k_bytes_per_cache_line = 64; // this is true for P4 & K8 // Flags for operations supported by this machine //------------------------------------- template<> struct implements_load <4> : public true_type {}; template<> struct implements_store <4> : public true_type {}; template<> struct implements_CAS <4> : public true_type {}; template<> struct implements_CAS <8> : public true_type {}; template<> struct implements_add <4> : public true_type {}; template<> struct implements_fetch_and_add<4> : public true_type {}; //------------------------------------- // CAS //-------------------------------------------------------------------------------- template<> inline bool compare_and_store(volatile int64_t * p, const int64_t & x, const int64_t & y) { register int32_t evh=int32_t(x>>32), evl=int32_t(x); register const int32_t nvh=int32_t(y>>32), nvl=int32_t(y); register bool result; __asm__ __volatile__ ( "# CAS64\n" " lock \n" " cmpxchg8b %[location] \n" " sete %[result] \n" : [location] "+m" (*p), [result] "=qm" (result), [expected_value_high] "+d" (evh), [expected_value_low] "+a" (evl) : [new_value_high] "c" (nvh), [new_value_low] "b" (nvl) : "cc" ); return result; } //-------------------------------------------------------------------------------- template<> inline bool compare_and_store(volatile int32_t *p, const int32_t & x, const int32_t & y) { register int32_t expected_value = x; register bool result; __asm__ __volatile__ ( "# CAS32\n" " lock \n" " cmpxchgl %[new_value],%[operand] \n" " sete %[result] \n" : [operand] "+m" (*p), [result] "=qm" (result), [expected_value] "+a" (expected_value) : [new_value] "r" (y) : "cc" ); return result; } //-------------------------------------------------------------------------------- // Atomic add/sub //-------------------------------------------------------------------------------- inline void increment(volatile int32_t * operand_address) { __asm__ __volatile__ ( "# atomic_increment_32\n" " lock; \n" " incl %[operand]; \n" : [operand] "+m" (*operand_address) : : "cc" ); } //-------------------------------------------------------------------------------- inline void decrement(volatile int32_t * operand_address) { __asm__ __volatile__ ( "# atomic_decrement_32\n" " lock; \n" " decl %[operand]; \n" : [operand] "+m" (*operand_address) : : "cc" ); } //-------------------------------------------------------------------------------- template<> inline void add(volatile int32_t * operand_address, const int32_t & addend) { if (__builtin_constant_p(addend) && addend==1) increment(operand_address); else if (__builtin_constant_p(addend) && addend==-1) decrement(operand_address); else __asm__ __volatile__ ( "# atomic_add_32 \n" " lock \n" " addl %[addend], %[operand] \n" : [operand] "+m" (*operand_address) : [addend] "ir" (addend) : "cc" ); } //-------------------------------------------------------------------------------- template<> inline void subtract(volatile int32_t * operand_address, const int32_t & subtrahend) { if (__builtin_constant_p(subtrahend) && subtrahend==1) decrement(operand_address); else if (__builtin_constant_p(subtrahend) && subtrahend==-1) increment(operand_address); else __asm__ __volatile__ ( "# atomic_subtract_32 \n" " lock \n" " subl %[subtrahend], %[operand] \n" : [operand] "+m" (*operand_address) : [subtrahend] "ir" (subtrahend) : "cc" ); } //-------------------------------------------------------------------------------- // Atomic fetch and add/sub //-------------------------------------------------------------------------------- template<> inline int32_t fetch_and_add(volatile int32_t * operand_address, const int32_t & addend) { register int32_t addend_and_fetched = addend; __asm__ __volatile__ ( "# atomic_fetch_and_add_32 \n" " lock; \n" " xaddl %[addend], %[operand]; \n" : [operand] "+m" (*operand_address), [addend] "+r" (addend_and_fetched) : : "cc" ); return addend_and_fetched; } //-------------------------------------------------------------------------------- template<> inline int32_t fetch_and_subtract(volatile int32_t * operand_address, const int32_t & subtrahend) { return fetch_and_add(operand_address, -subtrahend); } //-------------------------------------------------------------------------------- // Memory barriers //-------------------------------------------------------------------------------- inline void memory_barrier_readwrite() { #if _AKUPARA_X86_SSE_NOT_AVAILABLE __asm__ __volatile__ (" lock; addl $0,0(%%esp); # memory_barrier_readwrite" : : : "memory"); #else __asm__ __volatile__ (" mfence; # memory_barrier_readwrite" : : : "memory"); #endif // _LOCKFREE_ATOMIC_OPS_X86_LFENCE_NOT_AVAILABLE } //-------------------------------------------------------------------------------- inline void memory_barrier_read() { #if _AKUPARA_X86_SSE_NOT_AVAILABLE __asm__ __volatile__ (" lock; addl $0,0(%%esp); # memory_barrier_read" : : : "memory"); #else __asm__ __volatile__ (" lfence; # memory_barrier_read" : : : "memory"); #endif // _LOCKFREE_ATOMIC_OPS_X86_LFENCE_NOT_AVAILABLE } //-------------------------------------------------------------------------------- inline void memory_barrier_write() { __asm__ __volatile__ (" sfence; # memory_barrier_write" : : : "memory"); } //-------------------------------------------------------------------------------- } // namespace machine } // namespace atomic } // namespace threading } // namespace Akupara # endif // defined(__GNUC__) && defined(__i386__) #endif // _AKUPARA_THREADING_ATOMIC_OPS_GCC_X86_HPP__INCLUDED_