Atomics: Add atomic_fetch_and_or_uint8() function
[blender-staging.git] / intern / atomic / atomic_ops.h
1 /*
2  * Adopted from jemalloc with this license:
3  *
4  * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
5  * All rights reserved.
6  * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
7  * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
8
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  * 1. Redistributions of source code must retain the above copyright notice(s),
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice(s),
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
20  * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
26  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #ifndef __ATOMIC_OPS_H__
30 #define __ATOMIC_OPS_H__
31
32 #include <assert.h>
33
34 #if defined (__APPLE__)
35 #  include <libkern/OSAtomic.h>
36 #elif defined(_MSC_VER)
37 #  define NOGDI
38 #  ifndef NOMINMAX
39 #    define NOMINMAX
40 #  endif
41 #  define WIN32_LEAN_AND_MEAN
42 #  include <windows.h>
43 #elif defined(__arm__)
44 /* Attempt to fix compilation error on Debian armel kernel.
45  * arm7 architecture does have both 32 and 64bit atomics, however
46  * it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
47  */
48 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_1
49 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
50 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
51 #endif
52
53 /* needed for int types */
54 #include "../../source/blender/blenlib/BLI_sys_types.h"
55 #include <stdlib.h>
56 #include <stddef.h>
57
58 /* little macro so inline keyword works */
59 #if defined(_MSC_VER)
60 #  define ATOMIC_INLINE static __forceinline
61 #else
62 #  if (defined(__APPLE__) && defined(__ppc__))
63 /* static inline __attribute__ here breaks osx ppc gcc42 build */
64 #    define ATOMIC_INLINE static __attribute__((always_inline))
65 #  else
66 #    define ATOMIC_INLINE static inline __attribute__((always_inline))
67 #  endif
68 #endif
69
70 /* This is becoming a bit nastier that it was originally foreseen,
71  * consider using autoconfig detection instead.
72  */
73 #if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64)
74 #  define LG_SIZEOF_PTR 3
75 #  define LG_SIZEOF_INT 2
76 #else
77 #  define LG_SIZEOF_PTR 2
78 #  define LG_SIZEOF_INT 2
79 #endif
80
81 /************************/
82 /* Function prototypes. */
83
84 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
85 ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
86 ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
87 ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
88 #endif
89
90 ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
91 ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
92 ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
93
94 ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b);
95 ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b);
96
97 ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x);
98 ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x);
99 ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
100
101 ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x);
102 ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
103 ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
104
105 /******************************************************************************/
106 /* 64-bit operations. */
107 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
108 #  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
109 ATOMIC_INLINE uint64_t
110 atomic_add_uint64(uint64_t *p, uint64_t x)
111 {
112         return __sync_add_and_fetch(p, x);
113 }
114
115 ATOMIC_INLINE uint64_t
116 atomic_sub_uint64(uint64_t *p, uint64_t x)
117 {
118         return __sync_sub_and_fetch(p, x);
119 }
120
121 ATOMIC_INLINE uint64_t
122 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
123 {
124         return __sync_val_compare_and_swap(v, old, _new);
125 }
126 #elif (defined(_MSC_VER))
127 ATOMIC_INLINE uint64_t
128 atomic_add_uint64(uint64_t *p, uint64_t x)
129 {
130         return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
131 }
132
133 ATOMIC_INLINE uint64_t
134 atomic_sub_uint64(uint64_t *p, uint64_t x)
135 {
136         return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
137 }
138
139 ATOMIC_INLINE uint64_t
140 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
141 {
142         return InterlockedCompareExchange64((int64_t *)v, _new, old);
143 }
144 #elif (defined(__APPLE__))
145 ATOMIC_INLINE uint64_t
146 atomic_add_uint64(uint64_t *p, uint64_t x)
147 {
148         return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p);
149 }
150
151 ATOMIC_INLINE uint64_t
152 atomic_sub_uint64(uint64_t *p, uint64_t x)
153 {
154         return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
155 }
156
157 ATOMIC_INLINE uint64_t
158 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
159 {
160         uint64_t init_val = *v;
161         OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v);
162         return init_val;
163 }
164 #  elif (defined(__amd64__) || defined(__x86_64__))
165 ATOMIC_INLINE uint64_t
166 atomic_add_uint64(uint64_t *p, uint64_t x)
167 {
168         asm volatile (
169             "lock; xaddq %0, %1;"
170             : "+r" (x), "=m" (*p) /* Outputs. */
171             : "m" (*p) /* Inputs. */
172             );
173         return x;
174 }
175
176 ATOMIC_INLINE uint64_t
177 atomic_sub_uint64(uint64_t *p, uint64_t x)
178 {
179         x = (uint64_t)(-(int64_t)x);
180         asm volatile (
181             "lock; xaddq %0, %1;"
182             : "+r" (x), "=m" (*p) /* Outputs. */
183             : "m" (*p) /* Inputs. */
184             );
185         return x;
186 }
187
188 ATOMIC_INLINE uint64_t
189 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
190 {
191         uint64_t ret;
192         asm volatile (
193             "lock; cmpxchgq %2,%1"
194             : "=a" (ret), "+m" (*v)
195             : "r" (_new), "0" (old)
196             : "memory");
197         return ret;
198 }
199
200 #  elif (defined(JEMALLOC_ATOMIC9))
201 ATOMIC_INLINE uint64_t
202 atomic_add_uint64(uint64_t *p, uint64_t x)
203 {
204         /*
205          * atomic_fetchadd_64() doesn't exist, but we only ever use this
206          * function on LP64 systems, so atomic_fetchadd_long() will do.
207          */
208         assert(sizeof(uint64_t) == sizeof(unsigned long));
209
210         return atomic_fetchadd_long(p, (unsigned long)x) + x;
211 }
212
213 ATOMIC_INLINE uint64_t
214 atomic_sub_uint64(uint64_t *p, uint64_t x)
215 {
216         assert(sizeof(uint64_t) == sizeof(unsigned long));
217
218         return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
219 }
220
221 ATOMIC_INLINE uint64_t
222 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
223 {
224         assert(sizeof(uint64_t) == sizeof(unsigned long));
225
226         return atomic_cmpset_long(v, old, _new);
227 }
228 #  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
229 ATOMIC_INLINE uint64_t
230 atomic_add_uint64(uint64_t *p, uint64_t x)
231 {
232         return __sync_add_and_fetch(p, x);
233 }
234
235 ATOMIC_INLINE uint64_t
236 atomic_sub_uint64(uint64_t *p, uint64_t x)
237 {
238         return __sync_sub_and_fetch(p, x);
239 }
240
241 ATOMIC_INLINE uint64_t
242 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
243 {
244         return __sync_val_compare_and_swap(v, old, _new);
245 }
246 #  else
247 #    error "Missing implementation for 64-bit atomic operations"
248 #  endif
249 #endif
250
251 /******************************************************************************/
252 /* 32-bit operations. */
253 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
254 ATOMIC_INLINE uint32_t
255 atomic_add_uint32(uint32_t *p, uint32_t x)
256 {
257         return __sync_add_and_fetch(p, x);
258 }
259
260 ATOMIC_INLINE uint32_t
261 atomic_sub_uint32(uint32_t *p, uint32_t x)
262 {
263         return __sync_sub_and_fetch(p, x);
264 }
265
266 ATOMIC_INLINE uint32_t
267 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
268 {
269    return __sync_val_compare_and_swap(v, old, _new);
270 }
271 #elif (defined(_MSC_VER))
272 ATOMIC_INLINE uint32_t
273 atomic_add_uint32(uint32_t *p, uint32_t x)
274 {
275         return InterlockedExchangeAdd(p, x) + x;
276 }
277
278 ATOMIC_INLINE uint32_t
279 atomic_sub_uint32(uint32_t *p, uint32_t x)
280 {
281         return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
282 }
283
284 ATOMIC_INLINE uint32_t
285 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
286 {
287         return InterlockedCompareExchange((long *)v, _new, old);
288 }
289 #elif (defined(__APPLE__))
290 ATOMIC_INLINE uint32_t
291 atomic_add_uint32(uint32_t *p, uint32_t x)
292 {
293         return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p);
294 }
295
296 ATOMIC_INLINE uint32_t
297 atomic_sub_uint32(uint32_t *p, uint32_t x)
298 {
299         return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
300 }
301
302 ATOMIC_INLINE uint32_t
303 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
304 {
305         uint32_t init_val = *v;
306         OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v);
307         return init_val;
308 }
309 #elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
310 ATOMIC_INLINE uint32_t
311 atomic_add_uint32(uint32_t *p, uint32_t x)
312 {
313         asm volatile (
314             "lock; xaddl %0, %1;"
315             : "+r" (x), "=m" (*p) /* Outputs. */
316             : "m" (*p) /* Inputs. */
317             );
318         return x;
319 }
320
321 ATOMIC_INLINE uint32_t
322 atomic_sub_uint32(uint32_t *p, uint32_t x)
323 {
324         x = (uint32_t)(-(int32_t)x);
325         asm volatile (
326             "lock; xaddl %0, %1;"
327             : "+r" (x), "=m" (*p) /* Outputs. */
328             : "m" (*p) /* Inputs. */
329             );
330         return x;
331 }
332
333 ATOMIC_INLINE uint32_t
334 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
335 {
336         uint32_t ret;
337         asm volatile (
338             "lock; cmpxchgl %2,%1"
339             : "=a" (ret), "+m" (*v)
340             : "r" (_new), "0" (old)
341             : "memory");
342         return ret;
343 }
344 #elif (defined(JEMALLOC_ATOMIC9))
345 ATOMIC_INLINE uint32_t
346 atomic_add_uint32(uint32_t *p, uint32_t x)
347 {
348         return atomic_fetchadd_32(p, x) + x;
349 }
350
351 ATOMIC_INLINE uint32_t
352 atomic_sub_uint32(uint32_t *p, uint32_t x)
353 {
354         return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
355 }
356
357 ATOMIC_INLINE uint32_t
358 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
359 {
360         return atomic_cmpset_32(v, old, _new);
361 }
362 #elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)
363 ATOMIC_INLINE uint32_t
364 atomic_add_uint32(uint32_t *p, uint32_t x)
365 {
366         return __sync_add_and_fetch(p, x);
367 }
368
369 ATOMIC_INLINE uint32_t
370 atomic_sub_uint32(uint32_t *p, uint32_t x)
371 {
372         return __sync_sub_and_fetch(p, x);
373 }
374
375 ATOMIC_INLINE uint32_t
376 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
377 {
378         return __sync_val_compare_and_swap(v, old, _new);
379 }
380 #else
381 #  error "Missing implementation for 32-bit atomic operations"
382 #endif
383
384 /******************************************************************************/
385 /* 8-bit operations. */
386 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
387 ATOMIC_INLINE uint8_t
388 atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
389 {
390         return __sync_fetch_and_or(p, b);
391 }
392 ATOMIC_INLINE uint8_t
393 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
394 {
395         return __sync_fetch_and_and(p, b);
396 }
397 #elif (defined(_MSC_VER))
398 #include <intrin.h>
399 #pragma intrinsic(_InterlockedAnd8)
400 ATOMIC_INLINE uint8_t
401 atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
402 {
403 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
404         return InterlockedOr8((char *)p, (char)b);
405 #else
406         return _InterlockedOr8((char *)p, (char)b);
407 #endif
408 }
409 ATOMIC_INLINE uint8_t
410 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
411 {
412 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
413         return InterlockedAnd8((char *)p, (char)b);
414 #else
415         return _InterlockedAnd8((char *)p, (char)b);
416 #endif
417 }
418 #elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1)
419 ATOMIC_INLINE uint8_t
420 atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
421 {
422         return __sync_fetch_and_or(p, b);
423 }
424 ATOMIC_INLINE uint8_t
425 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
426 {
427         return __sync_fetch_and_and(p, b);
428 }
429 #else
430 #  error "Missing implementation for 8-bit atomic operations"
431 #endif
432
433 /******************************************************************************/
434 /* size_t operations. */
435 ATOMIC_INLINE size_t
436 atomic_add_z(size_t *p, size_t x)
437 {
438         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
439
440 #if (LG_SIZEOF_PTR == 3)
441         return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
442 #elif (LG_SIZEOF_PTR == 2)
443         return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
444 #endif
445 }
446
447 ATOMIC_INLINE size_t
448 atomic_sub_z(size_t *p, size_t x)
449 {
450         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
451
452 #if (LG_SIZEOF_PTR == 3)
453         return (size_t)atomic_add_uint64((uint64_t *)p,
454                                          (uint64_t)-((int64_t)x));
455 #elif (LG_SIZEOF_PTR == 2)
456         return (size_t)atomic_add_uint32((uint32_t *)p,
457                                          (uint32_t)-((int32_t)x));
458 #endif
459 }
460
461 ATOMIC_INLINE size_t
462 atomic_cas_z(size_t *v, size_t old, size_t _new)
463 {
464         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
465
466 #if (LG_SIZEOF_PTR == 3)
467         return (size_t)atomic_cas_uint64((uint64_t *)v,
468                                          (uint64_t)old,
469                                          (uint64_t)_new);
470 #elif (LG_SIZEOF_PTR == 2)
471         return (size_t)atomic_cas_uint32((uint32_t *)v,
472                                          (uint32_t)old,
473                                          (uint32_t)_new);
474 #endif
475 }
476
477 /******************************************************************************/
478 /* unsigned operations. */
479 ATOMIC_INLINE unsigned
480 atomic_add_u(unsigned *p, unsigned x)
481 {
482         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
483
484 #if (LG_SIZEOF_INT == 3)
485         return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
486 #elif (LG_SIZEOF_INT == 2)
487         return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
488 #endif
489 }
490
491 ATOMIC_INLINE unsigned
492 atomic_sub_u(unsigned *p, unsigned x)
493 {
494         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
495
496 #if (LG_SIZEOF_INT == 3)
497         return (unsigned)atomic_add_uint64((uint64_t *)p,
498                                            (uint64_t)-((int64_t)x));
499 #elif (LG_SIZEOF_INT == 2)
500         return (unsigned)atomic_add_uint32((uint32_t *)p,
501                                            (uint32_t)-((int32_t)x));
502 #endif
503 }
504
505 ATOMIC_INLINE unsigned
506 atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
507 {
508         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
509
510 #if (LG_SIZEOF_PTR == 3)
511         return (unsigned)atomic_cas_uint64((uint64_t *)v,
512                                            (uint64_t)old,
513                                            (uint64_t)_new);
514 #elif (LG_SIZEOF_PTR == 2)
515         return (unsigned)atomic_cas_uint32((uint32_t *)v,
516                                            (uint32_t)old,
517                                            (uint32_t)_new);
518 #endif
519 }
520
521 #endif /* __ATOMIC_OPS_H__ */