atomic_ops: fix typo in func names (uint32 instead of uint64).
[blender.git] / intern / atomic / atomic_ops.h
1 /*
2  * Adopted from jemalloc with this license:
3  *
4  * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
5  * All rights reserved.
6  * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
7  * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
8
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  * 1. Redistributions of source code must retain the above copyright notice(s),
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice(s),
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
20  * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
26  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #ifndef __ATOMIC_OPS_H__
30 #define __ATOMIC_OPS_H__
31
32 #include <assert.h>
33
34 #if defined (__APPLE__)
35 #  include <libkern/OSAtomic.h>
36 #elif defined(_MSC_VER)
37 #  define NOGDI
38 #  ifndef NOMINMAX
39 #    define NOMINMAX
40 #  endif
41 #  define WIN32_LEAN_AND_MEAN
42 #  include <windows.h>
43 #elif defined(__arm__)
44 /* Attempt to fix compilation error on Debian armel kernel.
45  * arm7 architecture does have both 32 and 64bit atomics, however
46  * it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
47  */
48 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
49 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
50 #endif
51
52 /* needed for int types */
53 #include "../../source/blender/blenlib/BLI_sys_types.h"
54 #include <stdlib.h>
55 #include <stddef.h>
56
57 /* little macro so inline keyword works */
58 #if defined(_MSC_VER)
59 #  define ATOMIC_INLINE static __forceinline
60 #else
61 #  if (defined(__APPLE__) && defined(__ppc__))
62 /* static inline __attribute__ here breaks osx ppc gcc42 build */
63 #    define ATOMIC_INLINE static __attribute__((always_inline))
64 #  else
65 #    define ATOMIC_INLINE static inline __attribute__((always_inline))
66 #  endif
67 #endif
68
69 /* This is becoming a bit nastier that it was originally foreseen,
70  * consider using autoconfig detection instead.
71  */
72 #if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64)
73 #  define LG_SIZEOF_PTR 3
74 #  define LG_SIZEOF_INT 2
75 #else
76 #  define LG_SIZEOF_PTR 2
77 #  define LG_SIZEOF_INT 2
78 #endif
79
80 /************************/
81 /* Function prototypes. */
82
83 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
84 ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
85 ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
86 ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
87 #endif
88
89 ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
90 ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
91 ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
92
93 ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x);
94 ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x);
95 ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
96
97 ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x);
98 ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
99 ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
100
101 /******************************************************************************/
102 /* 64-bit operations. */
103 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
104 #  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
105 ATOMIC_INLINE uint64_t
106 atomic_add_uint64(uint64_t *p, uint64_t x)
107 {
108         return __sync_add_and_fetch(p, x);
109 }
110
111 ATOMIC_INLINE uint64_t
112 atomic_sub_uint64(uint64_t *p, uint64_t x)
113 {
114         return __sync_sub_and_fetch(p, x);
115 }
116
117 ATOMIC_INLINE uint64_t
118 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
119 {
120         return __sync_val_compare_and_swap(v, old, _new);
121 }
122 #elif (defined(_MSC_VER))
123 ATOMIC_INLINE uint64_t
124 atomic_add_uint64(uint64_t *p, uint64_t x)
125 {
126         return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x);
127 }
128
129 ATOMIC_INLINE uint64_t
130 atomic_sub_uint64(uint64_t *p, uint64_t x)
131 {
132         return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x));
133 }
134
135 ATOMIC_INLINE uint64_t
136 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
137 {
138         return InterlockedCompareExchange64((int64_t *)v, _new, old);
139 }
140 #elif (defined(__APPLE__))
141 ATOMIC_INLINE uint64_t
142 atomic_add_uint64(uint64_t *p, uint64_t x)
143 {
144         return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p);
145 }
146
147 ATOMIC_INLINE uint64_t
148 atomic_sub_uint64(uint64_t *p, uint64_t x)
149 {
150         return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
151 }
152
153 ATOMIC_INLINE uint64_t
154 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
155 {
156         uint64_t init_val = *v;
157         OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v);
158         return init_val;
159 }
160 #  elif (defined(__amd64__) || defined(__x86_64__))
161 ATOMIC_INLINE uint64_t
162 atomic_add_uint64(uint64_t *p, uint64_t x)
163 {
164         asm volatile (
165             "lock; xaddq %0, %1;"
166             : "+r" (x), "=m" (*p) /* Outputs. */
167             : "m" (*p) /* Inputs. */
168             );
169         return x;
170 }
171
172 ATOMIC_INLINE uint64_t
173 atomic_sub_uint64(uint64_t *p, uint64_t x)
174 {
175         x = (uint64_t)(-(int64_t)x);
176         asm volatile (
177             "lock; xaddq %0, %1;"
178             : "+r" (x), "=m" (*p) /* Outputs. */
179             : "m" (*p) /* Inputs. */
180             );
181         return x;
182 }
183
184 ATOMIC_INLINE uint64_t
185 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
186 {
187         uint64_t ret;
188         asm volatile (
189             "lock; cmpxchgq %2,%1"
190             : "=a" (ret), "+m" (*v)
191             : "r" (_new), "0" (old)
192             : "memory");
193         return ret;
194 }
195
196 #  elif (defined(JEMALLOC_ATOMIC9))
197 ATOMIC_INLINE uint64_t
198 atomic_add_uint64(uint64_t *p, uint64_t x)
199 {
200         /*
201          * atomic_fetchadd_64() doesn't exist, but we only ever use this
202          * function on LP64 systems, so atomic_fetchadd_long() will do.
203          */
204         assert(sizeof(uint64_t) == sizeof(unsigned long));
205
206         return atomic_fetchadd_long(p, (unsigned long)x) + x;
207 }
208
209 ATOMIC_INLINE uint64_t
210 atomic_sub_uint64(uint64_t *p, uint64_t x)
211 {
212         assert(sizeof(uint64_t) == sizeof(unsigned long));
213
214         return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
215 }
216
217 ATOMIC_INLINE uint64_t
218 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
219 {
220         assert(sizeof(uint64_t) == sizeof(unsigned long));
221
222         return atomic_cmpset_long(v, old, _new);
223 }
224 #  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
225 ATOMIC_INLINE uint64_t
226 atomic_add_uint64(uint64_t *p, uint64_t x)
227 {
228         return __sync_add_and_fetch(p, x);
229 }
230
231 ATOMIC_INLINE uint64_t
232 atomic_sub_uint64(uint64_t *p, uint64_t x)
233 {
234         return __sync_sub_and_fetch(p, x);
235 }
236
237 ATOMIC_INLINE uint64_t
238 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
239 {
240         return __sync_val_compare_and_swap(v, old, _new);
241 }
242 #  else
243 #    error "Missing implementation for 64-bit atomic operations"
244 #  endif
245 #endif
246
247 /******************************************************************************/
248 /* 32-bit operations. */
249 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
250 ATOMIC_INLINE uint32_t
251 atomic_add_uint32(uint32_t *p, uint32_t x)
252 {
253         return __sync_add_and_fetch(p, x);
254 }
255
256 ATOMIC_INLINE uint32_t
257 atomic_sub_uint32(uint32_t *p, uint32_t x)
258 {
259         return __sync_sub_and_fetch(p, x);
260 }
261
262 ATOMIC_INLINE uint32_t
263 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
264 {
265    return __sync_val_compare_and_swap(v, old, _new);
266 }
267 #elif (defined(_MSC_VER))
268 ATOMIC_INLINE uint32_t
269 atomic_add_uint32(uint32_t *p, uint32_t x)
270 {
271         return InterlockedExchangeAdd(p, x);
272 }
273
274 ATOMIC_INLINE uint32_t
275 atomic_sub_uint32(uint32_t *p, uint32_t x)
276 {
277         return InterlockedExchangeAdd(p, -((int32_t)x));
278 }
279
280 ATOMIC_INLINE uint32_t
281 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
282 {
283         return InterlockedCompareExchange((long *)v, _new, old);
284 }
285 #elif (defined(__APPLE__))
286 ATOMIC_INLINE uint32_t
287 atomic_add_uint32(uint32_t *p, uint32_t x)
288 {
289         return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p);
290 }
291
292 ATOMIC_INLINE uint32_t
293 atomic_sub_uint32(uint32_t *p, uint32_t x)
294 {
295         return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
296 }
297
298 ATOMIC_INLINE uint32_t
299 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
300 {
301         uint32_t init_val = *v;
302         OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v);
303         return init_val;
304 }
305 #elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
306 ATOMIC_INLINE uint32_t
307 atomic_add_uint32(uint32_t *p, uint32_t x)
308 {
309         asm volatile (
310             "lock; xaddl %0, %1;"
311             : "+r" (x), "=m" (*p) /* Outputs. */
312             : "m" (*p) /* Inputs. */
313             );
314         return x;
315 }
316
317 ATOMIC_INLINE uint32_t
318 atomic_sub_uint32(uint32_t *p, uint32_t x)
319 {
320         x = (uint32_t)(-(int32_t)x);
321         asm volatile (
322             "lock; xaddl %0, %1;"
323             : "+r" (x), "=m" (*p) /* Outputs. */
324             : "m" (*p) /* Inputs. */
325             );
326         return x;
327 }
328
329 ATOMIC_INLINE uint32_t
330 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
331 {
332         uint32_t ret;
333         asm volatile (
334             "lock; cmpxchgl %2,%1"
335             : "=a" (ret), "+m" (*v)
336             : "r" (_new), "0" (old)
337             : "memory");
338         return ret;
339 }
340 #elif (defined(JEMALLOC_ATOMIC9))
341 ATOMIC_INLINE uint32_t
342 atomic_add_uint32(uint32_t *p, uint32_t x)
343 {
344         return atomic_fetchadd_32(p, x) + x;
345 }
346
347 ATOMIC_INLINE uint32_t
348 atomic_sub_uint32(uint32_t *p, uint32_t x)
349 {
350         return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
351 }
352
353 ATOMIC_INLINE uint32_t
354 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
355 {
356         return atomic_cmpset_32(v, old, _new);
357 }
358 #elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)
359 ATOMIC_INLINE uint32_t
360 atomic_add_uint32(uint32_t *p, uint32_t x)
361 {
362         return __sync_add_and_fetch(p, x);
363 }
364
365 ATOMIC_INLINE uint32_t
366 atomic_sub_uint32(uint32_t *p, uint32_t x)
367 {
368         return __sync_sub_and_fetch(p, x);
369 }
370
371 ATOMIC_INLINE uint32_t
372 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
373 {
374         return __sync_val_compare_and_swap(v, old, _new);
375 }
376 #else
377 #  error "Missing implementation for 32-bit atomic operations"
378 #endif
379
380 /******************************************************************************/
381 /* size_t operations. */
382 ATOMIC_INLINE size_t
383 atomic_add_z(size_t *p, size_t x)
384 {
385         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
386
387 #if (LG_SIZEOF_PTR == 3)
388         return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
389 #elif (LG_SIZEOF_PTR == 2)
390         return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
391 #endif
392 }
393
394 ATOMIC_INLINE size_t
395 atomic_sub_z(size_t *p, size_t x)
396 {
397         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
398
399 #if (LG_SIZEOF_PTR == 3)
400         return (size_t)atomic_add_uint64((uint64_t *)p,
401                                          (uint64_t)-((int64_t)x));
402 #elif (LG_SIZEOF_PTR == 2)
403         return (size_t)atomic_add_uint32((uint32_t *)p,
404                                          (uint32_t)-((int32_t)x));
405 #endif
406 }
407
408 ATOMIC_INLINE size_t
409 atomic_cas_z(size_t *v, size_t old, size_t _new)
410 {
411         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
412
413 #if (LG_SIZEOF_PTR == 3)
414         return (size_t)atomic_cas_uint64((uint64_t *)v,
415                                          (uint64_t)old,
416                                          (uint64_t)_new);
417 #elif (LG_SIZEOF_PTR == 2)
418         return (size_t)atomic_cas_uint32((uint32_t *)v,
419                                          (uint32_t)old,
420                                          (uint32_t)_new);
421 #endif
422 }
423
424 /******************************************************************************/
425 /* unsigned operations. */
426 ATOMIC_INLINE unsigned
427 atomic_add_u(unsigned *p, unsigned x)
428 {
429         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
430
431 #if (LG_SIZEOF_INT == 3)
432         return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
433 #elif (LG_SIZEOF_INT == 2)
434         return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
435 #endif
436 }
437
438 ATOMIC_INLINE unsigned
439 atomic_sub_u(unsigned *p, unsigned x)
440 {
441         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
442
443 #if (LG_SIZEOF_INT == 3)
444         return (unsigned)atomic_add_uint64((uint64_t *)p,
445                                            (uint64_t)-((int64_t)x));
446 #elif (LG_SIZEOF_INT == 2)
447         return (unsigned)atomic_add_uint32((uint32_t *)p,
448                                            (uint32_t)-((int32_t)x));
449 #endif
450 }
451
452 ATOMIC_INLINE unsigned
453 atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
454 {
455         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
456
457 #if (LG_SIZEOF_PTR == 3)
458         return (unsigned)atomic_cas_uint64((uint64_t *)v,
459                                            (uint64_t)old,
460                                            (uint64_t)_new);
461 #elif (LG_SIZEOF_PTR == 2)
462         return (unsigned)atomic_cas_uint32((uint32_t *)v,
463                                            (uint32_t)old,
464                                            (uint32_t)_new);
465 #endif
466 }
467
468 #endif /* __ATOMIC_OPS_H__ */