Atomics: Use _InterlockedAnd8 direction for 342bit MSVC
[blender.git] / intern / atomic / atomic_ops.h
1 /*
2  * Adopted from jemalloc with this license:
3  *
4  * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
5  * All rights reserved.
6  * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
7  * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
8
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  * 1. Redistributions of source code must retain the above copyright notice(s),
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice(s),
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
20  * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
26  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #ifndef __ATOMIC_OPS_H__
30 #define __ATOMIC_OPS_H__
31
32 #include <assert.h>
33
34 #if defined (__APPLE__)
35 #  include <libkern/OSAtomic.h>
36 #elif defined(_MSC_VER)
37 #  define NOGDI
38 #  ifndef NOMINMAX
39 #    define NOMINMAX
40 #  endif
41 #  define WIN32_LEAN_AND_MEAN
42 #  include <windows.h>
43 #elif defined(__arm__)
44 /* Attempt to fix compilation error on Debian armel kernel.
45  * arm7 architecture does have both 32 and 64bit atomics, however
46  * it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
47  */
48 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
49 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
50 #endif
51
52 /* needed for int types */
53 #include "../../source/blender/blenlib/BLI_sys_types.h"
54 #include <stdlib.h>
55 #include <stddef.h>
56
57 /* little macro so inline keyword works */
58 #if defined(_MSC_VER)
59 #  define ATOMIC_INLINE static __forceinline
60 #else
61 #  if (defined(__APPLE__) && defined(__ppc__))
62 /* static inline __attribute__ here breaks osx ppc gcc42 build */
63 #    define ATOMIC_INLINE static __attribute__((always_inline))
64 #  else
65 #    define ATOMIC_INLINE static inline __attribute__((always_inline))
66 #  endif
67 #endif
68
69 /* This is becoming a bit nastier that it was originally foreseen,
70  * consider using autoconfig detection instead.
71  */
72 #if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64)
73 #  define LG_SIZEOF_PTR 3
74 #  define LG_SIZEOF_INT 2
75 #else
76 #  define LG_SIZEOF_PTR 2
77 #  define LG_SIZEOF_INT 2
78 #endif
79
80 /************************/
81 /* Function prototypes. */
82
83 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
84 ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
85 ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
86 ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
87 #endif
88
89 ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
90 ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
91 ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
92
93 ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b);
94
95 ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x);
96 ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x);
97 ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
98
99 ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x);
100 ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
101 ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
102
103 /******************************************************************************/
104 /* 64-bit operations. */
105 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
106 #  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
107 ATOMIC_INLINE uint64_t
108 atomic_add_uint64(uint64_t *p, uint64_t x)
109 {
110         return __sync_add_and_fetch(p, x);
111 }
112
113 ATOMIC_INLINE uint64_t
114 atomic_sub_uint64(uint64_t *p, uint64_t x)
115 {
116         return __sync_sub_and_fetch(p, x);
117 }
118
119 ATOMIC_INLINE uint64_t
120 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
121 {
122         return __sync_val_compare_and_swap(v, old, _new);
123 }
124 #elif (defined(_MSC_VER))
125 ATOMIC_INLINE uint64_t
126 atomic_add_uint64(uint64_t *p, uint64_t x)
127 {
128         return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
129 }
130
131 ATOMIC_INLINE uint64_t
132 atomic_sub_uint64(uint64_t *p, uint64_t x)
133 {
134         return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
135 }
136
137 ATOMIC_INLINE uint64_t
138 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
139 {
140         return InterlockedCompareExchange64((int64_t *)v, _new, old);
141 }
142 #elif (defined(__APPLE__))
143 ATOMIC_INLINE uint64_t
144 atomic_add_uint64(uint64_t *p, uint64_t x)
145 {
146         return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p);
147 }
148
149 ATOMIC_INLINE uint64_t
150 atomic_sub_uint64(uint64_t *p, uint64_t x)
151 {
152         return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
153 }
154
155 ATOMIC_INLINE uint64_t
156 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
157 {
158         uint64_t init_val = *v;
159         OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v);
160         return init_val;
161 }
162 #  elif (defined(__amd64__) || defined(__x86_64__))
163 ATOMIC_INLINE uint64_t
164 atomic_add_uint64(uint64_t *p, uint64_t x)
165 {
166         asm volatile (
167             "lock; xaddq %0, %1;"
168             : "+r" (x), "=m" (*p) /* Outputs. */
169             : "m" (*p) /* Inputs. */
170             );
171         return x;
172 }
173
174 ATOMIC_INLINE uint64_t
175 atomic_sub_uint64(uint64_t *p, uint64_t x)
176 {
177         x = (uint64_t)(-(int64_t)x);
178         asm volatile (
179             "lock; xaddq %0, %1;"
180             : "+r" (x), "=m" (*p) /* Outputs. */
181             : "m" (*p) /* Inputs. */
182             );
183         return x;
184 }
185
186 ATOMIC_INLINE uint64_t
187 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
188 {
189         uint64_t ret;
190         asm volatile (
191             "lock; cmpxchgq %2,%1"
192             : "=a" (ret), "+m" (*v)
193             : "r" (_new), "0" (old)
194             : "memory");
195         return ret;
196 }
197
198 #  elif (defined(JEMALLOC_ATOMIC9))
199 ATOMIC_INLINE uint64_t
200 atomic_add_uint64(uint64_t *p, uint64_t x)
201 {
202         /*
203          * atomic_fetchadd_64() doesn't exist, but we only ever use this
204          * function on LP64 systems, so atomic_fetchadd_long() will do.
205          */
206         assert(sizeof(uint64_t) == sizeof(unsigned long));
207
208         return atomic_fetchadd_long(p, (unsigned long)x) + x;
209 }
210
211 ATOMIC_INLINE uint64_t
212 atomic_sub_uint64(uint64_t *p, uint64_t x)
213 {
214         assert(sizeof(uint64_t) == sizeof(unsigned long));
215
216         return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
217 }
218
219 ATOMIC_INLINE uint64_t
220 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
221 {
222         assert(sizeof(uint64_t) == sizeof(unsigned long));
223
224         return atomic_cmpset_long(v, old, _new);
225 }
226 #  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
227 ATOMIC_INLINE uint64_t
228 atomic_add_uint64(uint64_t *p, uint64_t x)
229 {
230         return __sync_add_and_fetch(p, x);
231 }
232
233 ATOMIC_INLINE uint64_t
234 atomic_sub_uint64(uint64_t *p, uint64_t x)
235 {
236         return __sync_sub_and_fetch(p, x);
237 }
238
239 ATOMIC_INLINE uint64_t
240 atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
241 {
242         return __sync_val_compare_and_swap(v, old, _new);
243 }
244 #  else
245 #    error "Missing implementation for 64-bit atomic operations"
246 #  endif
247 #endif
248
249 /******************************************************************************/
250 /* 32-bit operations. */
251 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
252 ATOMIC_INLINE uint32_t
253 atomic_add_uint32(uint32_t *p, uint32_t x)
254 {
255         return __sync_add_and_fetch(p, x);
256 }
257
258 ATOMIC_INLINE uint32_t
259 atomic_sub_uint32(uint32_t *p, uint32_t x)
260 {
261         return __sync_sub_and_fetch(p, x);
262 }
263
264 ATOMIC_INLINE uint32_t
265 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
266 {
267    return __sync_val_compare_and_swap(v, old, _new);
268 }
269 #elif (defined(_MSC_VER))
270 ATOMIC_INLINE uint32_t
271 atomic_add_uint32(uint32_t *p, uint32_t x)
272 {
273         return InterlockedExchangeAdd(p, x) + x;
274 }
275
276 ATOMIC_INLINE uint32_t
277 atomic_sub_uint32(uint32_t *p, uint32_t x)
278 {
279         return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
280 }
281
282 ATOMIC_INLINE uint32_t
283 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
284 {
285         return InterlockedCompareExchange((long *)v, _new, old);
286 }
287 #elif (defined(__APPLE__))
288 ATOMIC_INLINE uint32_t
289 atomic_add_uint32(uint32_t *p, uint32_t x)
290 {
291         return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p);
292 }
293
294 ATOMIC_INLINE uint32_t
295 atomic_sub_uint32(uint32_t *p, uint32_t x)
296 {
297         return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
298 }
299
300 ATOMIC_INLINE uint32_t
301 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
302 {
303         uint32_t init_val = *v;
304         OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v);
305         return init_val;
306 }
307 #elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
308 ATOMIC_INLINE uint32_t
309 atomic_add_uint32(uint32_t *p, uint32_t x)
310 {
311         asm volatile (
312             "lock; xaddl %0, %1;"
313             : "+r" (x), "=m" (*p) /* Outputs. */
314             : "m" (*p) /* Inputs. */
315             );
316         return x;
317 }
318
319 ATOMIC_INLINE uint32_t
320 atomic_sub_uint32(uint32_t *p, uint32_t x)
321 {
322         x = (uint32_t)(-(int32_t)x);
323         asm volatile (
324             "lock; xaddl %0, %1;"
325             : "+r" (x), "=m" (*p) /* Outputs. */
326             : "m" (*p) /* Inputs. */
327             );
328         return x;
329 }
330
331 ATOMIC_INLINE uint32_t
332 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
333 {
334         uint32_t ret;
335         asm volatile (
336             "lock; cmpxchgl %2,%1"
337             : "=a" (ret), "+m" (*v)
338             : "r" (_new), "0" (old)
339             : "memory");
340         return ret;
341 }
342 #elif (defined(JEMALLOC_ATOMIC9))
343 ATOMIC_INLINE uint32_t
344 atomic_add_uint32(uint32_t *p, uint32_t x)
345 {
346         return atomic_fetchadd_32(p, x) + x;
347 }
348
349 ATOMIC_INLINE uint32_t
350 atomic_sub_uint32(uint32_t *p, uint32_t x)
351 {
352         return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
353 }
354
355 ATOMIC_INLINE uint32_t
356 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
357 {
358         return atomic_cmpset_32(v, old, _new);
359 }
360 #elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)
361 ATOMIC_INLINE uint32_t
362 atomic_add_uint32(uint32_t *p, uint32_t x)
363 {
364         return __sync_add_and_fetch(p, x);
365 }
366
367 ATOMIC_INLINE uint32_t
368 atomic_sub_uint32(uint32_t *p, uint32_t x)
369 {
370         return __sync_sub_and_fetch(p, x);
371 }
372
373 ATOMIC_INLINE uint32_t
374 atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
375 {
376         return __sync_val_compare_and_swap(v, old, _new);
377 }
378 #else
379 #  error "Missing implementation for 32-bit atomic operations"
380 #endif
381
382 /******************************************************************************/
383 /* 8-bit operations. */
384 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
385 ATOMIC_INLINE uint8_t
386 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
387 {
388         return __sync_fetch_and_and(p, b);
389 }
390 #elif (defined(_MSC_VER))
391 #include <intrin.h>
392 #pragma intrinsic(_InterlockedAnd8)
393 ATOMIC_INLINE uint8_t
394 atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
395 {
396 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
397         return InterlockedAnd8((char *)p, (char)b);
398 #else
399         return _InterlockedAnd8((char *)p, (char)b);
400 #endif
401 }
402 #else
403 #  error "Missing implementation for 8-bit atomic operations"
404 #endif
405
406 /******************************************************************************/
407 /* size_t operations. */
408 ATOMIC_INLINE size_t
409 atomic_add_z(size_t *p, size_t x)
410 {
411         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
412
413 #if (LG_SIZEOF_PTR == 3)
414         return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
415 #elif (LG_SIZEOF_PTR == 2)
416         return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
417 #endif
418 }
419
420 ATOMIC_INLINE size_t
421 atomic_sub_z(size_t *p, size_t x)
422 {
423         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
424
425 #if (LG_SIZEOF_PTR == 3)
426         return (size_t)atomic_add_uint64((uint64_t *)p,
427                                          (uint64_t)-((int64_t)x));
428 #elif (LG_SIZEOF_PTR == 2)
429         return (size_t)atomic_add_uint32((uint32_t *)p,
430                                          (uint32_t)-((int32_t)x));
431 #endif
432 }
433
434 ATOMIC_INLINE size_t
435 atomic_cas_z(size_t *v, size_t old, size_t _new)
436 {
437         assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
438
439 #if (LG_SIZEOF_PTR == 3)
440         return (size_t)atomic_cas_uint64((uint64_t *)v,
441                                          (uint64_t)old,
442                                          (uint64_t)_new);
443 #elif (LG_SIZEOF_PTR == 2)
444         return (size_t)atomic_cas_uint32((uint32_t *)v,
445                                          (uint32_t)old,
446                                          (uint32_t)_new);
447 #endif
448 }
449
450 /******************************************************************************/
451 /* unsigned operations. */
452 ATOMIC_INLINE unsigned
453 atomic_add_u(unsigned *p, unsigned x)
454 {
455         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
456
457 #if (LG_SIZEOF_INT == 3)
458         return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
459 #elif (LG_SIZEOF_INT == 2)
460         return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
461 #endif
462 }
463
464 ATOMIC_INLINE unsigned
465 atomic_sub_u(unsigned *p, unsigned x)
466 {
467         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
468
469 #if (LG_SIZEOF_INT == 3)
470         return (unsigned)atomic_add_uint64((uint64_t *)p,
471                                            (uint64_t)-((int64_t)x));
472 #elif (LG_SIZEOF_INT == 2)
473         return (unsigned)atomic_add_uint32((uint32_t *)p,
474                                            (uint32_t)-((int32_t)x));
475 #endif
476 }
477
478 ATOMIC_INLINE unsigned
479 atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
480 {
481         assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
482
483 #if (LG_SIZEOF_PTR == 3)
484         return (unsigned)atomic_cas_uint64((uint64_t *)v,
485                                            (uint64_t)old,
486                                            (uint64_t)_new);
487 #elif (LG_SIZEOF_PTR == 2)
488         return (unsigned)atomic_cas_uint32((uint32_t *)v,
489                                            (uint32_t)old,
490                                            (uint32_t)_new);
491 #endif
492 }
493
494 #endif /* __ATOMIC_OPS_H__ */