06eb8f21da6cdb62f714240ad065f0d3df52e9ad
[blender-staging.git] / intern / atomic / atomic_ops.h
1 /*
2  * Adopted from jemalloc with this license:
3  *
4  * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
5  * All rights reserved.
6  * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
7  * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
8
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  * 1. Redistributions of source code must retain the above copyright notice(s),
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice(s),
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
20  * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
26  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #ifndef __ATOMIC_OPS_H__
30 #define __ATOMIC_OPS_H__
31
32 #include <assert.h>
33
34 #if defined (__APPLE__)
35 #  include <libkern/OSAtomic.h>
36 #elif defined(_MSC_VER)
37 #  define NOGDI
38 #  ifndef NOMINMAX
39 #    define NOMINMAX
40 #  endif
41 #  define WIN32_LEAN_AND_MEAN
42 #  include <windows.h>
43 #elif defined(__arm__)
44 /* Attempt to fix compilation error on Debian armel kernel.
45  * arm7 architecture does have both 32 and 64bit atomics, however
46  * it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
47  */
48 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_1
49 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
50 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
51 #endif
52
53 /* needed for int types */
54 #include "../../source/blender/blenlib/BLI_sys_types.h"
55 #include <stdlib.h>
56 #include <stddef.h>
57
58 /* little macro so inline keyword works */
59 #if defined(_MSC_VER)
60 #  define ATOMIC_INLINE static __forceinline
61 #else
62 #  if (defined(__APPLE__) && defined(__ppc__))
63 /* static inline __attribute__ here breaks osx ppc gcc42 build */
64 #    define ATOMIC_INLINE static __attribute__((always_inline))
65 #  else
66 #    define ATOMIC_INLINE static inline __attribute__((always_inline))
67 #  endif
68 #endif
69
70 /* This is becoming a bit nastier that it was originally foreseen,
71  * consider using autoconfig detection instead.
72  */
73 #if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64)
74 #  define LG_SIZEOF_PTR 3
75 #  define LG_SIZEOF_INT 2
76 #else
77 #  define LG_SIZEOF_PTR 2
78 #  define LG_SIZEOF_INT 2
79 #endif
80
81 /************************/
82 /* Function prototypes. */
83
84 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
85 ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
86 ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
87 ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
88 #endif
89
90 ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
91 ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
92 ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
93
94 ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b);
95
96 ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x);
97 ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x);
98 ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
99
100 ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x);
101 ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
102 ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
103
104 /******************************************************************************/
105 /* 64-bit operations. */
106 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
107 #  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
108 ATOMIC_INLINE uint64_t
109 atomic_add_uint64(uint64_t *p, uint64_t x)
110 {
111         return __sync_add_and_fetch(p, x);
112 }
113
114 ATOMIC_INLINE uint64_t
115 atomic_sub_uint64(uint64_t *p, uint64_t x)
116 {
117         return __sync_sub_and_fetch(p, x);
118 }
119
120 ATOMIC_INLINE uint64_t
121 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
122 {
123         return __sync_val_compare_and_swap(v, old, _new);
124 }
125 #elif (defined(_MSC_VER))
126 ATOMIC_INLINE uint64_t
127 atomic_add_uint64(uint64_t *p, uint64_t x)
128 {
129         return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
130 }
131
132 ATOMIC_INLINE uint64_t
133 atomic_sub_uint64(uint64_t *p, uint64_t x)
134 {
135         return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
136 }
137
138 ATOMIC_INLINE uint64_t
139 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
140 {
141         return InterlockedCompareExchange64((int64_t *)v, _new, old);
142 }
143 #elif (defined(__APPLE__))
144 ATOMIC_INLINE uint64_t
145 atomic_add_uint64(uint64_t *p, uint64_t x)
146 {
147         return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p);
148 }
149
150 ATOMIC_INLINE uint64_t
151 atomic_sub_uint64(uint64_t *p, uint64_t x)
152 {
153         return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
154 }
155
156 ATOMIC_INLINE uint64_t
157 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
158 {
159         uint64_t init_val = *v;
160         OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v);
161         return init_val;
162 }
163 #  elif (defined(__amd64__) || defined(__x86_64__))
164 ATOMIC_INLINE uint64_t
165 atomic_add_uint64(uint64_t *p, uint64_t x)
166 {
167         asm volatile (
168             "lock; xaddq %0, %1;"
169             : "+r" (x), "=m" (*p) /* Outputs. */
170             : "m" (*p) /* Inputs. */
171             );
172         return x;
173 }
174
175 ATOMIC_INLINE uint64_t
176 atomic_sub_uint64(uint64_t *p, uint64_t x)
177 {
178         x = (uint64_t)(-(int64_t)x);
179         asm volatile (
180             "lock; xaddq %0, %1;"
181             : "+r" (x), "=m" (*p) /* Outputs. */
182             : "m" (*p) /* Inputs. */
183             );
184         return x;
185 }
186
187 ATOMIC_INLINE uint64_t
188 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
189 {
190         uint64_t ret;
191         asm volatile (
192             "lock; cmpxchgq %2,%1"
193             : "=a" (ret), "+m" (*v)
194             : "r" (_new), "0" (old)
195             : "memory");
196         return ret;
197 }
198
199 #  elif (defined(JEMALLOC_ATOMIC9))
200 ATOMIC_INLINE uint64_t
201 atomic_add_uint64(uint64_t *p, uint64_t x)
202 {
203         /*
204          * atomic_fetchadd_64() doesn't exist, but we only ever use this
205          * function on LP64 systems, so atomic_fetchadd_long() will do.
206          */
207         assert(sizeof(uint64_t) == sizeof(unsigned long));
208
209         return atomic_fetchadd_long(p, (unsigned long)x) + x;
210 }
211
212 ATOMIC_INLINE uint64_t
213 atomic_sub_uint64(uint64_t *p, uint64_t x)
214 {
215         assert(sizeof(uint64_t) == sizeof(unsigned long));
216
217         return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
218 }
219
220 ATOMIC_INLINE uint64_t
221 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
222 {
223         assert(sizeof(uint64_t) == sizeof(unsigned long));
224
225         return atomic_cmpset_long(v, old, _new);
226 }
227 #  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
228 ATOMIC_INLINE uint64_t
229 atomic_add_uint64(uint64_t *p, uint64_t x)
230 {
231         return __sync_add_and_fetch(p, x);
232 }
233
234 ATOMIC_INLINE uint64_t
235 atomic_sub_uint64(uint64_t *p, uint64_t x)
236 {
237         return __sync_sub_and_fetch(p, x);
238 }
239
240 ATOMIC_INLINE uint64_t
241 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
242 {
243         return __sync_val_compare_and_swap(v, old, _new);
244 }
245 #  else
246 #    error "Missing implementation for 64-bit atomic operations"
247 #  endif
248 #endif
249
250 /******************************************************************************/
251 /* 32-bit operations. */
252 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
253 ATOMIC_INLINE uint32_t
254 atomic_add_uint32(uint32_t *p, uint32_t x)
255 {
256         return __sync_add_and_fetch(p, x);
257 }
258
259 ATOMIC_INLINE uint32_t
260 atomic_sub_uint32(uint32_t *p, uint32_t x)
261 {
262         return __sync_sub_and_fetch(p, x);
263 }
264
265 ATOMIC_INLINE uint32_t
266 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
267 {
268    return __sync_val_compare_and_swap(v, old, _new);
269 }
270 #elif (defined(_MSC_VER))
271 ATOMIC_INLINE uint32_t
272 atomic_add_uint32(uint32_t *p, uint32_t x)
273 {
274         return InterlockedExchangeAdd(p, x) + x;
275 }
276
277 ATOMIC_INLINE uint32_t
278 atomic_sub_uint32(uint32_t *p, uint32_t x)
279 {
280         return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
281 }
282
283 ATOMIC_INLINE uint32_t
284 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
285 {
286         return InterlockedCompareExchange((long *)v, _new, old);
287 }
288 #elif (defined(__APPLE__))
289 ATOMIC_INLINE uint32_t
290 atomic_add_uint32(uint32_t *p, uint32_t x)
291 {
292         return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p);
293 }
294
295 ATOMIC_INLINE uint32_t
296 atomic_sub_uint32(uint32_t *p, uint32_t x)
297 {
298         return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
299 }
300
301 ATOMIC_INLINE uint32_t
302 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
303 {
304         uint32_t init_val = *v;
305         OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v);
306         return init_val;
307 }
308 #elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
309 ATOMIC_INLINE uint32_t
310 atomic_add_uint32(uint32_t *p, uint32_t x)
311 {
312         asm volatile (
313             "lock; xaddl %0, %1;"
314             : "+r" (x), "=m" (*p) /* Outputs. */
315             : "m" (*p) /* Inputs. */
316             );
317         return x;
318 }
319
320 ATOMIC_INLINE uint32_t
321 atomic_sub_uint32(uint32_t *p, uint32_t x)
322 {
323         x = (uint32_t)(-(int32_t)x);
324         asm volatile (
325             "lock; xaddl %0, %1;"
326             : "+r" (x), "=m" (*p) /* Outputs. */
327             : "m" (*p) /* Inputs. */
328             );
329         return x;
330 }
331
332 ATOMIC_INLINE uint32_t
333 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
334 {
335         uint32_t ret;
336         asm volatile (
337             "lock; cmpxchgl %2,%1"
338             : "=a" (ret), "+m" (*v)
339             : "r" (_new), "0" (old)
340             : "memory");
341         return ret;
342 }
343 #elif (defined(JEMALLOC_ATOMIC9))
344 ATOMIC_INLINE uint32_t
345 atomic_add_uint32(uint32_t *p, uint32_t x)
346 {
347         return atomic_fetchadd_32(p, x) + x;
348 }
349
350 ATOMIC_INLINE uint32_t
351 atomic_sub_uint32(uint32_t *p, uint32_t x)
352 {
353         return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
354 }
355
356 ATOMIC_INLINE uint32_t
357 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
358 {
359         return atomic_cmpset_32(v, old, _new);
360 }
361 #elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)
362 ATOMIC_INLINE uint32_t
363 atomic_add_uint32(uint32_t *p, uint32_t x)
364 {
365         return __sync_add_and_fetch(p, x);
366 }
367
368 ATOMIC_INLINE uint32_t
369 atomic_sub_uint32(uint32_t *p, uint32_t x)
370 {
371         return __sync_sub_and_fetch(p, x);
372 }
373
374 ATOMIC_INLINE uint32_t
375 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
376 {
377         return __sync_val_compare_and_swap(v, old, _new);
378 }
379 #else
380 #  error "Missing implementation for 32-bit atomic operations"
381 #endif
382
383 /******************************************************************************/
384 /* 8-bit operations. */
385 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
386 ATOMIC_INLINE uint8_t
387 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
388 {
389         return __sync_fetch_and_and(p, b);
390 }
391 #elif (defined(_MSC_VER))
392 #include <intrin.h>
393 #pragma intrinsic(_InterlockedAnd8)
394 ATOMIC_INLINE uint8_t
395 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
396 {
397 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
398         return InterlockedAnd8((char *)p, (char)b);
399 #else
400         return _InterlockedAnd8((char *)p, (char)b);
401 #endif
402 }
403 #elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1)
404 ATOMIC_INLINE uint8_t
405 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
406 {
407         return __sync_fetch_and_and(p, b);
408 }
409 #else
410 #  error "Missing implementation for 8-bit atomic operations"
411 #endif
412
413 /******************************************************************************/
414 /* size_t operations. */
415 ATOMIC_INLINE size_t
416 atomic_add_z(size_t *p, size_t x)
417 {
418         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
419
420 #if (LG_SIZEOF_PTR == 3)
421         return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
422 #elif (LG_SIZEOF_PTR == 2)
423         return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
424 #endif
425 }
426
427 ATOMIC_INLINE size_t
428 atomic_sub_z(size_t *p, size_t x)
429 {
430         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
431
432 #if (LG_SIZEOF_PTR == 3)
433         return (size_t)atomic_add_uint64((uint64_t *)p,
434                                          (uint64_t)-((int64_t)x));
435 #elif (LG_SIZEOF_PTR == 2)
436         return (size_t)atomic_add_uint32((uint32_t *)p,
437                                          (uint32_t)-((int32_t)x));
438 #endif
439 }
440
441 ATOMIC_INLINE size_t
442 atomic_cas_z(size_t *v, size_t old, size_t _new)
443 {
444         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
445
446 #if (LG_SIZEOF_PTR == 3)
447         return (size_t)atomic_cas_uint64((uint64_t *)v,
448                                          (uint64_t)old,
449                                          (uint64_t)_new);
450 #elif (LG_SIZEOF_PTR == 2)
451         return (size_t)atomic_cas_uint32((uint32_t *)v,
452                                          (uint32_t)old,
453                                          (uint32_t)_new);
454 #endif
455 }
456
457 /******************************************************************************/
458 /* unsigned operations. */
459 ATOMIC_INLINE unsigned
460 atomic_add_u(unsigned *p, unsigned x)
461 {
462         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
463
464 #if (LG_SIZEOF_INT == 3)
465         return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
466 #elif (LG_SIZEOF_INT == 2)
467         return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
468 #endif
469 }
470
471 ATOMIC_INLINE unsigned
472 atomic_sub_u(unsigned *p, unsigned x)
473 {
474         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
475
476 #if (LG_SIZEOF_INT == 3)
477         return (unsigned)atomic_add_uint64((uint64_t *)p,
478                                            (uint64_t)-((int64_t)x));
479 #elif (LG_SIZEOF_INT == 2)
480         return (unsigned)atomic_add_uint32((uint32_t *)p,
481                                            (uint32_t)-((int32_t)x));
482 #endif
483 }
484
485 ATOMIC_INLINE unsigned
486 atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
487 {
488         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
489
490 #if (LG_SIZEOF_PTR == 3)
491         return (unsigned)atomic_cas_uint64((uint64_t *)v,
492                                            (uint64_t)old,
493                                            (uint64_t)_new);
494 #elif (LG_SIZEOF_PTR == 2)
495         return (unsigned)atomic_cas_uint32((uint32_t *)v,
496                                            (uint32_t)old,
497                                            (uint32_t)_new);
498 #endif
499 }
500
501 #endif /* __ATOMIC_OPS_H__ */