libs/backends/wavesaudio/wavesapi/akupara/threading/atomic_ops_gcc_x86.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201

/*
 *  Akupara/threading/atomic_ops_gcc_x86.hpp
 *
 *
 *  Created by Udi Barzilai on 06/06.
 *  Copyright 2006 __MyCompanyName__. All rights reserved.
 *
 */
#if !defined(_AKUPARA_THREADING_ATOMIC_OPS_GCC_X86_HPP__INCLUDED_)
#	define _AKUPARA_THREADING_ATOMIC_OPS_GCC_X86_HPP__INCLUDED_
#	if defined(__GNUC__) && (defined(__i386__) ||  defined(__x86_64__))

namespace Akupara
{
	namespace threading
	{
		namespace atomic
		{
			namespace machine
			{
				const unsigned int k_bytes_per_cache_line = 64;  // this is true for P4 & K8


				// Flags for operations supported by this machine
				//-------------------------------------
				template<> struct implements_load         <4> : public true_type {};
				template<> struct implements_store        <4> : public true_type {};
				template<> struct implements_CAS          <4> : public true_type {};
				template<> struct implements_CAS          <8> : public true_type {};
				template<> struct implements_add          <4> : public true_type {};
				template<> struct implements_fetch_and_add<4> : public true_type {};
				//-------------------------------------


				// CAS
				//--------------------------------------------------------------------------------
				template<>
				inline bool compare_and_store<int64_t>(volatile int64_t * p, const int64_t & x, const int64_t & y)
				{
					register int32_t evh=int32_t(x>>32), evl=int32_t(x);
					register const int32_t nvh=int32_t(y>>32), nvl=int32_t(y);
					register bool result;
					__asm__ __volatile__ (
							"# CAS64\n"
							"	lock		           \n"
							"	cmpxchg8b %[location]  \n"
							"	sete %[result]         \n"
							: [location] "+m" (*p), [result] "=qm" (result), [expected_value_high] "+d" (evh), [expected_value_low] "+a" (evl)
							: [new_value_high] "c" (nvh), [new_value_low] "b" (nvl)
							: "cc"
					);
					return result;
				}
				//--------------------------------------------------------------------------------
				template<>
				inline bool compare_and_store<int32_t>(volatile int32_t *p, const int32_t & x, const int32_t & y)
				{
					register int32_t expected_value = x;
					register bool result;
					__asm__	__volatile__ (
							"# CAS32\n"
							"	lock                             \n"
							"	cmpxchgl %[new_value],%[operand] \n"
							"	sete %[result]                   \n"
							: [operand] "+m" (*p), [result] "=qm" (result), [expected_value] "+a" (expected_value)
							: [new_value] "r" (y)
							: "cc"
					);
					return result;
				}
				//--------------------------------------------------------------------------------


				// Atomic add/sub
				//--------------------------------------------------------------------------------
				inline void increment(volatile int32_t * operand_address)
				{
					__asm__ __volatile__ (
					"# atomic_increment_32\n"
					"	lock;             \n"
					"	incl %[operand];  \n"
					: [operand] "+m" (*operand_address)
					:
					: "cc"
					);
				}
				//--------------------------------------------------------------------------------
				inline void decrement(volatile int32_t * operand_address)
				{
					__asm__ __volatile__ (
					"# atomic_decrement_32\n"
					"	lock;             \n"
					"	decl %[operand];  \n"
					: [operand] "+m" (*operand_address)
					:
					: "cc"
					);
				}
				//--------------------------------------------------------------------------------
				template<>
				inline void add<int32_t>(volatile int32_t * operand_address, const int32_t & addend)
				{
					if (__builtin_constant_p(addend) && addend==1)
						increment(operand_address);
					else if (__builtin_constant_p(addend) && addend==-1)
						decrement(operand_address);
					else
						__asm__ __volatile__ (
						"# atomic_add_32               \n"
						"	lock                       \n"
						"	addl %[addend], %[operand] \n"
						: [operand] "+m" (*operand_address)
						: [addend] "ir" (addend)
						: "cc"
						);
				}
				//--------------------------------------------------------------------------------
				template<>
				inline void subtract<int32_t>(volatile int32_t * operand_address, const int32_t & subtrahend)
				{
					if (__builtin_constant_p(subtrahend) && subtrahend==1)
						decrement(operand_address);
					else if (__builtin_constant_p(subtrahend) && subtrahend==-1)
						increment(operand_address);
					else
						__asm__ __volatile__ (
						"# atomic_subtract_32              \n"
						"	lock                           \n"
						"	subl %[subtrahend], %[operand] \n"
						: [operand] "+m" (*operand_address)
						: [subtrahend] "ir" (subtrahend)
						: "cc"
						);
				}
				//--------------------------------------------------------------------------------


				// Atomic fetch and add/sub
				//--------------------------------------------------------------------------------
				template<>
				inline int32_t fetch_and_add<int32_t>(volatile int32_t * operand_address, const int32_t & addend)
				{
					register int32_t addend_and_fetched = addend;
					__asm__ __volatile__ (
					"# atomic_fetch_and_add_32       \n"
					"	lock;                        \n"
					"	xaddl %[addend], %[operand]; \n"
					: [operand] "+m" (*operand_address), [addend] "+r" (addend_and_fetched)
					:
					: "cc"
					);
					return addend_and_fetched;
				}
				//--------------------------------------------------------------------------------
				template<>
				inline int32_t fetch_and_subtract<int32_t>(volatile int32_t * operand_address, const int32_t & subtrahend)
				{
					return fetch_and_add(operand_address, -subtrahend);
				}
				//--------------------------------------------------------------------------------


				// Memory barriers
				//--------------------------------------------------------------------------------
				inline void memory_barrier_readwrite()
				{
				#if _AKUPARA_X86_SSE_NOT_AVAILABLE
					__asm__ __volatile__ ("	lock; addl $0,0(%%esp); # memory_barrier_readwrite" : : : "memory");
				#else
					__asm__ __volatile__ ("	mfence;   # memory_barrier_readwrite" : : : "memory");
				#endif // _LOCKFREE_ATOMIC_OPS_X86_LFENCE_NOT_AVAILABLE
				}
				//--------------------------------------------------------------------------------
				inline void memory_barrier_read()
				{
				#if _AKUPARA_X86_SSE_NOT_AVAILABLE
					__asm__ __volatile__ ("	lock; addl $0,0(%%esp); # memory_barrier_read" : : : "memory");
				#else
					__asm__ __volatile__ ("	lfence;  # memory_barrier_read" : : : "memory");
				#endif // _LOCKFREE_ATOMIC_OPS_X86_LFENCE_NOT_AVAILABLE
				}
				//--------------------------------------------------------------------------------
				inline void memory_barrier_write()
				{
					__asm__ __volatile__ ("	sfence;  # memory_barrier_write" : : : "memory");
				}
				//--------------------------------------------------------------------------------

			} // namespace machine
		} // namespace atomic
	} // namespace threading
} // namespace Akupara

#	endif // defined(__GNUC__) && defined(__i386__)
#endif // _AKUPARA_THREADING_ATOMIC_OPS_GCC_X86_HPP__INCLUDED_