Line data Source code
1 : // Protocol Buffers - Google's data interchange format
2 : // Copyright 2012 Google Inc. All rights reserved.
3 : // https://developers.google.com/protocol-buffers/
4 : //
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following disclaimer
13 : // in the documentation and/or other materials provided with the
14 : // distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived from
17 : // this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // This file is an internal atomic implementation, use atomicops.h instead.
32 :
33 : #ifndef GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
34 : #define GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
35 :
36 : namespace google {
37 : namespace protobuf {
38 : namespace internal {
39 :
40 : // This struct is not part of the public API of this module; clients may not
41 : // use it.
42 : // Features of this x86. Values may not be correct before main() is run,
43 : // but are set conservatively.
44 : struct AtomicOps_x86CPUFeatureStruct {
45 : bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
46 : // after acquire compare-and-swap.
47 : bool has_sse2; // Processor has SSE2.
48 : };
49 : extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
50 :
51 : #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
52 :
53 : // 32-bit low-level operations on any platform.
54 :
55 : inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
56 : Atomic32 old_value,
57 : Atomic32 new_value) {
58 : Atomic32 prev;
59 : __asm__ __volatile__("lock; cmpxchgl %1,%2"
60 : : "=a" (prev)
61 : : "q" (new_value), "m" (*ptr), "0" (old_value)
62 : : "memory");
63 : return prev;
64 : }
65 :
66 : inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
67 : Atomic32 new_value) {
68 : __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
69 : : "=r" (new_value)
70 : : "m" (*ptr), "0" (new_value)
71 : : "memory");
72 : return new_value; // Now it's the previous value.
73 : }
74 :
75 : inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
76 : Atomic32 increment) {
77 : Atomic32 temp = increment;
78 : __asm__ __volatile__("lock; xaddl %0,%1"
79 : : "+r" (temp), "+m" (*ptr)
80 : : : "memory");
81 : // temp now holds the old value of *ptr
82 : return temp + increment;
83 : }
84 :
85 : inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
86 : Atomic32 increment) {
87 : Atomic32 temp = increment;
88 : __asm__ __volatile__("lock; xaddl %0,%1"
89 : : "+r" (temp), "+m" (*ptr)
90 : : : "memory");
91 : // temp now holds the old value of *ptr
92 : if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
93 : __asm__ __volatile__("lfence" : : : "memory");
94 : }
95 : return temp + increment;
96 : }
97 :
98 : inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
99 : Atomic32 old_value,
100 : Atomic32 new_value) {
101 : Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
102 : if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
103 : __asm__ __volatile__("lfence" : : : "memory");
104 : }
105 : return x;
106 : }
107 :
108 : inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
109 : Atomic32 old_value,
110 : Atomic32 new_value) {
111 : return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
112 : }
113 :
114 : inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
115 : *ptr = value;
116 : }
117 :
118 : #if defined(__x86_64__)
119 :
120 : // 64-bit implementations of memory barrier can be simpler, because it
121 : // "mfence" is guaranteed to exist.
122 : inline void MemoryBarrier() {
123 : __asm__ __volatile__("mfence" : : : "memory");
124 : }
125 :
126 : inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
127 : *ptr = value;
128 : MemoryBarrier();
129 : }
130 :
131 : #else
132 :
133 : inline void MemoryBarrier() {
134 : if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
135 : __asm__ __volatile__("mfence" : : : "memory");
136 : } else { // mfence is faster but not present on PIII
137 : Atomic32 x = 0;
138 : NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
139 : }
140 : }
141 :
142 : inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
143 : if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
144 : *ptr = value;
145 : __asm__ __volatile__("mfence" : : : "memory");
146 : } else {
147 : NoBarrier_AtomicExchange(ptr, value);
148 : // acts as a barrier on PIII
149 : }
150 : }
151 : #endif
152 :
153 : inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
154 0 : ATOMICOPS_COMPILER_BARRIER();
155 0 : *ptr = value; // An x86 store acts as a release barrier.
156 : // See comments in Atomic64 version of Release_Store(), below.
157 : }
158 :
159 : inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
160 : return *ptr;
161 : }
162 :
163 : inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
164 0 : Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
165 : // See comments in Atomic64 version of Release_Store(), below.
166 0 : ATOMICOPS_COMPILER_BARRIER();
167 : return value;
168 : }
169 :
170 : inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
171 : MemoryBarrier();
172 : return *ptr;
173 : }
174 :
175 : #if defined(__x86_64__)
176 :
177 : // 64-bit low-level operations on 64-bit platform.
178 :
179 : inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
180 : Atomic64 old_value,
181 : Atomic64 new_value) {
182 : Atomic64 prev;
183 : __asm__ __volatile__("lock; cmpxchgq %1,%2"
184 : : "=a" (prev)
185 : : "q" (new_value), "m" (*ptr), "0" (old_value)
186 190 : : "memory");
187 : return prev;
188 : }
189 :
190 : inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
191 : Atomic64 new_value) {
192 : __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
193 : : "=r" (new_value)
194 : : "m" (*ptr), "0" (new_value)
195 0 : : "memory");
196 : return new_value; // Now it's the previous value.
197 : }
198 :
199 : inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
200 : Atomic64 increment) {
201 0 : Atomic64 temp = increment;
202 : __asm__ __volatile__("lock; xaddq %0,%1"
203 : : "+r" (temp), "+m" (*ptr)
204 0 : : : "memory");
205 : // temp now contains the previous value of *ptr
206 : return temp + increment;
207 : }
208 :
209 : inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
210 : Atomic64 increment) {
211 : Atomic64 temp = increment;
212 : __asm__ __volatile__("lock; xaddq %0,%1"
213 : : "+r" (temp), "+m" (*ptr)
214 : : : "memory");
215 : // temp now contains the previous value of *ptr
216 : if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
217 : __asm__ __volatile__("lfence" : : : "memory");
218 : }
219 : return temp + increment;
220 : }
221 :
222 : inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
223 0 : *ptr = value;
224 : }
225 :
226 : inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
227 : *ptr = value;
228 : MemoryBarrier();
229 : }
230 :
231 : inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
232 190 : ATOMICOPS_COMPILER_BARRIER();
233 :
234 190 : *ptr = value; // An x86 store acts as a release barrier
235 : // for current AMD/Intel chips as of Jan 2008.
236 : // See also Acquire_Load(), below.
237 :
238 : // When new chips come out, check:
239 : // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
240 : // System Programming Guide, Chatper 7: Multiple-processor management,
241 : // Section 7.2, Memory Ordering.
242 : // Last seen at:
243 : // http://developer.intel.com/design/pentium4/manuals/index_new.htm
244 : //
245 : // x86 stores/loads fail to act as barriers for a few instructions (clflush
246 : // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
247 : // not generated by the compiler, and are rare. Users of these instructions
248 : // need to know about cache behaviour in any case since all of these involve
249 : // either flushing cache lines or non-temporal cache hints.
250 : }
251 :
252 : inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
253 0 : return *ptr;
254 : }
255 :
256 2764819 : inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
257 2855786 : Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
258 : // for current AMD/Intel chips as of Jan 2008.
259 : // See also Release_Store(), above.
260 2855786 : ATOMICOPS_COMPILER_BARRIER();
261 2766978 : return value;
262 : }
263 :
264 : inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
265 : MemoryBarrier();
266 : return *ptr;
267 : }
268 :
269 : inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
270 : Atomic64 old_value,
271 : Atomic64 new_value) {
272 190 : Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
273 190 : if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
274 0 : __asm__ __volatile__("lfence" : : : "memory");
275 : }
276 : return x;
277 : }
278 :
279 : inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
280 : Atomic64 old_value,
281 : Atomic64 new_value) {
282 : return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
283 : }
284 :
285 : #endif // defined(__x86_64__)
286 :
287 : } // namespace internal
288 : } // namespace protobuf
289 : } // namespace google
290 :
291 : #undef ATOMICOPS_COMPILER_BARRIER
292 :
293 : #endif // GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
|