diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h index 38670be56fd..578cfb76eb6 100644 --- a/intern/atomic/atomic_ops.h +++ b/intern/atomic/atomic_ops.h @@ -82,6 +82,12 @@ ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new); + +ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x); +ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x); +ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x); +ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x); +ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new); #endif ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x); @@ -92,9 +98,23 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x); ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x); ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x); +ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x); +ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x); +ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new); + +ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x); +ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x); +ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x); + ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b); ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b); +ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b); +ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b); + +ATOMIC_INLINE char atomic_fetch_and_or_char(char *p, char b); +ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b); + ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x); ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x); ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x); diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h index 34158a0b45e..51275e2b36b 100644 --- a/intern/atomic/intern/atomic_ops_ext.h +++ b/intern/atomic/intern/atomic_ops_ext.h @@ -179,6 +179,18 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig #endif } +/******************************************************************************/ +/* Char operations. */ +ATOMIC_INLINE char atomic_fetch_and_or_char(char *p, char b) +{ + return (char)atomic_fetch_and_or_uint8((uint8_t *)p, (uint8_t)b); +} + +ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b) +{ + return (char)atomic_fetch_and_and_uint8((uint8_t *)p, (uint8_t)b); +} + /******************************************************************************/ /* Pointer operations. */ diff --git a/intern/atomic/intern/atomic_ops_msvc.h b/intern/atomic/intern/atomic_ops_msvc.h index 034ac1e3e53..ab31b3b789a 100644 --- a/intern/atomic/intern/atomic_ops_msvc.h +++ b/intern/atomic/intern/atomic_ops_msvc.h @@ -43,6 +43,7 @@ /******************************************************************************/ /* 64-bit operations. */ #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) +/* Unsigned */ ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) { return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x; @@ -67,10 +68,37 @@ ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x) { return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)); } + +/* Signed */ +ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x) +{ + return InterlockedExchangeAdd64(p, x) + x; +} + +ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x) +{ + return InterlockedExchangeAdd64(p, -x) - x; +} + +ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new) +{ + return InterlockedCompareExchange64(v, _new, old); +} + +ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x) +{ + return InterlockedExchangeAdd64(p, x); +} + +ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x) +{ + return InterlockedExchangeAdd64(p, -x); +} #endif /******************************************************************************/ /* 32-bit operations. */ +/* Unsigned */ ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) { return InterlockedExchangeAdd(p, x) + x; @@ -101,9 +129,41 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x) return InterlockedAnd((long *)p, x); } +/* Signed */ +ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x) +{ + return InterlockedExchangeAdd(p, x) + x; +} + +ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x) +{ + return InterlockedExchangeAdd(p, -x) - x; +} + +ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new) +{ + return InterlockedCompareExchange(v, _new, old); +} + +ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x) +{ + return InterlockedExchangeAdd(p, x); +} + +ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x) +{ + return InterlockedOr(p, x); +} + +ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x) +{ + return InterlockedAnd(p, x); +} + /******************************************************************************/ /* 8-bit operations. */ +/* Unsigned */ #pragma intrinsic(_InterlockedAnd8) ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) { @@ -124,4 +184,25 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) #endif } +/* Signed */ +#pragma intrinsic(_InterlockedAnd8) +ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b) +{ +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) + return InterlockedAnd8((char *)p, (char)b); +#else + return _InterlockedAnd8((char *)p, (char)b); +#endif +} + +#pragma intrinsic(_InterlockedOr8) +ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b) +{ +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) + return InterlockedOr8((char *)p, (char)b); +#else + return _InterlockedOr8((char *)p, (char)b); +#endif +} + #endif /* __ATOMIC_OPS_MSVC_H__ */ diff --git a/intern/atomic/intern/atomic_ops_unix.h b/intern/atomic/intern/atomic_ops_unix.h index 0a3322ad2b1..783a30f743b 100644 --- a/intern/atomic/intern/atomic_ops_unix.h +++ b/intern/atomic/intern/atomic_ops_unix.h @@ -58,6 +58,7 @@ /* 64-bit operations. */ #if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) # if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +/* Unsigned */ ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x) { return __sync_add_and_fetch(p, x); @@ -82,7 +83,35 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne { return __sync_val_compare_and_swap(v, old, _new); } + +/* Signed */ +ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x) +{ + return __sync_add_and_fetch(p, x); +} + +ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x) +{ + return __sync_sub_and_fetch(p, x); +} + +ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x) +{ + return __sync_fetch_and_add(p, x); +} + +ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x) +{ + return __sync_fetch_and_sub(p, x); +} + +ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new) +{ + return __sync_val_compare_and_swap(v, old, _new); +} + # elif (defined(__amd64__) || defined(__x86_64__)) +/* Unsigned */ ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x) { asm volatile ( @@ -124,6 +153,49 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne : "memory"); return ret; } + +/* Signed */ +ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x) +{ + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return x; +} + +ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x) +{ + x = -x; + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return x; +} + +ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x) +{ + return atomic_fetch_and_add_int64(p, x) + x; +} + +ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x) +{ + return atomic_fetch_and_sub_int64(p, x) - x; +} + +ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new) +{ + int64_t ret; + asm volatile ( + "lock; cmpxchgq %2,%1" + : "=a" (ret), "+m" (*v) + : "r" (_new), "0" (old) + : "memory"); + return ret; +} # else # error "Missing implementation for 64-bit atomic operations" # endif @@ -132,6 +204,7 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne /******************************************************************************/ /* 32-bit operations. */ #if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +/* Unsigned */ ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) { return __sync_add_and_fetch(p, x); @@ -146,7 +219,25 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne { return __sync_val_compare_and_swap(v, old, _new); } + +/* Signed */ +ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x) +{ + return __sync_add_and_fetch(p, x); +} + +ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x) +{ + return __sync_sub_and_fetch(p, x); +} + +ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new) +{ + return __sync_val_compare_and_swap(v, old, _new); +} + #elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +/* Unsigned */ ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) { uint32_t ret = x; @@ -155,18 +246,18 @@ ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x) : "+r" (ret), "=m" (*p) /* Outputs. */ : "m" (*p) /* Inputs. */ ); - return ret+x; + return ret + x; } ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x) { - ret = (uint32_t)(-(int32_t)x); + uint32_t ret = (uint32_t)(-(int32_t)x); asm volatile ( "lock; xaddl %0, %1;" : "+r" (ret), "=m" (*p) /* Outputs. */ : "m" (*p) /* Inputs. */ ); - return ret-x; + return ret - x; } ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) @@ -179,11 +270,47 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne : "memory"); return ret; } + +/* Signed */ +ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x) +{ + int32_t ret = x; + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (ret), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return ret + x; +} + +ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x) +{ + int32_t ret = -x; + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (ret), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return ret - x; +} + +ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new) +{ + int32_t ret; + asm volatile ( + "lock; cmpxchgl %2,%1" + : "=a" (ret), "+m" (*v) + : "r" (_new), "0" (old) + : "memory"); + return ret; +} + #else # error "Missing implementation for 32-bit atomic operations" #endif #if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +/* Unsigned */ ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x) { return __sync_fetch_and_add(p, x); @@ -199,6 +326,22 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x) return __sync_fetch_and_and(p, x); } +/* Signed */ +ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x) +{ + return __sync_fetch_and_add(p, x); +} + +ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x) +{ + return __sync_fetch_and_or(p, x); +} + +ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x) +{ + return __sync_fetch_and_and(p, x); +} + #else # error "Missing implementation for 32-bit atomic operations" #endif @@ -206,6 +349,7 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x) /******************************************************************************/ /* 8-bit operations. */ #if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1)) +/* Unsigned */ ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) { return __sync_fetch_and_and(p, b); @@ -214,6 +358,17 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) { return __sync_fetch_and_or(p, b); } + +/* Signed */ +ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b) +{ + return __sync_fetch_and_and(p, b); +} +ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b) +{ + return __sync_fetch_and_or(p, b); +} + #else # error "Missing implementation for 8-bit atomic operations" #endif diff --git a/source/blender/blenkernel/intern/library.c b/source/blender/blenkernel/intern/library.c index 46d5a725959..99dd6f86549 100644 --- a/source/blender/blenkernel/intern/library.c +++ b/source/blender/blenkernel/intern/library.c @@ -2407,10 +2407,10 @@ void BKE_library_filepath_set(Library *lib, const char *filepath) void BKE_id_tag_set_atomic(ID *id, int tag) { - atomic_fetch_and_or_uint32((uint32_t *)&id->tag, tag); + atomic_fetch_and_or_int32(&id->tag, tag); } void BKE_id_tag_clear_atomic(ID *id, int tag) { - atomic_fetch_and_and_uint32((uint32_t *)&id->tag, ~tag); + atomic_fetch_and_and_int32(&id->tag, ~tag); } diff --git a/source/blender/blenkernel/intern/object.c b/source/blender/blenkernel/intern/object.c index 95394f6be63..72dbd545f8c 100644 --- a/source/blender/blenkernel/intern/object.c +++ b/source/blender/blenkernel/intern/object.c @@ -327,14 +327,14 @@ void BKE_object_free_derived_caches(Object *ob) Mesh *me = ob->data; if (me && me->bb) { - atomic_fetch_and_or_uint32((uint *)&me->bb->flag, BOUNDBOX_DIRTY); + atomic_fetch_and_or_int32(&me->bb->flag, BOUNDBOX_DIRTY); } } else if (ELEM(ob->type, OB_SURF, OB_CURVE, OB_FONT)) { Curve *cu = ob->data; if (cu && cu->bb) { - atomic_fetch_and_or_uint32((uint *)&cu->bb->flag, BOUNDBOX_DIRTY); + atomic_fetch_and_or_int32(&cu->bb->flag, BOUNDBOX_DIRTY); } } diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c index 6fc89eb778a..585a18cdad5 100644 --- a/source/blender/blenkernel/intern/pbvh.c +++ b/source/blender/blenkernel/intern/pbvh.c @@ -1008,9 +1008,7 @@ static void pbvh_update_normals_store_task_cb(void *userdata, const int n) MVert *mvert = &bvh->verts[v]; /* mvert is shared between nodes, hence between threads. */ - if (atomic_fetch_and_and_uint8( - (uint8_t *)&mvert->flag, (uint8_t)~ME_VERT_PBVH_UPDATE) & ME_VERT_PBVH_UPDATE) - { + if (atomic_fetch_and_and_char(&mvert->flag, (char)~ME_VERT_PBVH_UPDATE) & ME_VERT_PBVH_UPDATE) { normalize_v3(vnors[v]); normal_float_to_short_v3(mvert->no, vnors[v]); } diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index e050f3148b8..d69241c3737 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -1003,8 +1003,7 @@ BLI_INLINE bool parallel_range_next_iter_get( ParallelRangeState * __restrict state, int * __restrict iter, int * __restrict count) { - uint32_t uval = atomic_fetch_and_add_uint32((uint32_t *)(&state->iter), state->chunk_size); - int previter = *(int32_t *)&uval; + int previter = atomic_fetch_and_add_int32(&state->iter, state->chunk_size); *iter = previter; *count = max_ii(0, min_ii(state->chunk_size, state->stop - previter)); @@ -1124,7 +1123,7 @@ static void task_parallel_range_ex( } num_tasks = min_ii(num_tasks, (stop - start) / state.chunk_size); - atomic_fetch_and_add_uint32((uint32_t *)(&state.iter), 0); + atomic_fetch_and_add_int32(&state.iter, 0); if (use_userdata_chunk) { userdata_chunk_array = MALLOCA(userdata_chunk_size * num_tasks);