Merge branch 'master' into blender2.8

This commit is contained in:
Bastien Montagne
2017-11-23 16:29:00 +01:00
8 changed files with 278 additions and 13 deletions

View File

@@ -82,6 +82,12 @@ ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new);
#endif
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x);
@@ -92,9 +98,23 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new);
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x);
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b);
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b);
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b);
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b);
ATOMIC_INLINE char atomic_fetch_and_or_char(char *p, char b);
ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b);
ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);

View File

@@ -179,6 +179,18 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig
#endif
}
/******************************************************************************/
/* Char operations. */
ATOMIC_INLINE char atomic_fetch_and_or_char(char *p, char b)
{
return (char)atomic_fetch_and_or_uint8((uint8_t *)p, (uint8_t)b);
}
ATOMIC_INLINE char atomic_fetch_and_and_char(char *p, char b)
{
return (char)atomic_fetch_and_and_uint8((uint8_t *)p, (uint8_t)b);
}
/******************************************************************************/
/* Pointer operations. */

View File

@@ -43,6 +43,7 @@
/******************************************************************************/
/* 64-bit operations. */
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
@@ -67,10 +68,37 @@ ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x));
}
/* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, x) + x;
}
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, -x) - x;
}
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
return InterlockedCompareExchange64(v, _new, old);
}
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, x);
}
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, -x);
}
#endif
/******************************************************************************/
/* 32-bit operations. */
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, x) + x;
@@ -101,9 +129,41 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
return InterlockedAnd((long *)p, x);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd(p, x) + x;
}
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd(p, -x) - x;
}
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
return InterlockedCompareExchange(v, _new, old);
}
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd(p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x)
{
return InterlockedOr(p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
{
return InterlockedAnd(p, x);
}
/******************************************************************************/
/* 8-bit operations. */
/* Unsigned */
#pragma intrinsic(_InterlockedAnd8)
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
{
@@ -124,4 +184,25 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
#endif
}
/* Signed */
#pragma intrinsic(_InterlockedAnd8)
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
{
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedAnd8((char *)p, (char)b);
#else
return _InterlockedAnd8((char *)p, (char)b);
#endif
}
#pragma intrinsic(_InterlockedOr8)
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
{
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
return InterlockedOr8((char *)p, (char)b);
#else
return _InterlockedOr8((char *)p, (char)b);
#endif
}
#endif /* __ATOMIC_OPS_MSVC_H__ */

View File

@@ -58,6 +58,7 @@
/* 64-bit operations. */
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
# if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
{
return __sync_add_and_fetch(p, x);
@@ -82,7 +83,35 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
{
return __sync_val_compare_and_swap(v, old, _new);
}
/* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
return __sync_add_and_fetch(p, x);
}
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{
return __sync_sub_and_fetch(p, x);
}
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
return __sync_fetch_and_add(p, x);
}
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
return __sync_fetch_and_sub(p, x);
}
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
return __sync_val_compare_and_swap(v, old, _new);
}
# elif (defined(__amd64__) || defined(__x86_64__))
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
asm volatile (
@@ -124,6 +153,49 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
: "memory");
return ret;
}
/* Signed */
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return x;
}
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
x = -x;
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return x;
}
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
return atomic_fetch_and_add_int64(p, x) + x;
}
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
{
return atomic_fetch_and_sub_int64(p, x) - x;
}
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
int64_t ret;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
}
# else
# error "Missing implementation for 64-bit atomic operations"
# endif
@@ -132,6 +204,7 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
/******************************************************************************/
/* 32-bit operations. */
#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
return __sync_add_and_fetch(p, x);
@@ -146,7 +219,25 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
{
return __sync_val_compare_and_swap(v, old, _new);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
return __sync_add_and_fetch(p, x);
}
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
return __sync_sub_and_fetch(p, x);
}
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
return __sync_val_compare_and_swap(v, old, _new);
}
#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = x;
@@ -155,18 +246,18 @@ ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret+x;
return ret + x;
}
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
ret = (uint32_t)(-(int32_t)x);
uint32_t ret = (uint32_t)(-(int32_t)x);
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret-x;
return ret - x;
}
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
@@ -179,11 +270,47 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
: "memory");
return ret;
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret + x;
}
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = -x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return ret - x;
}
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
int32_t ret;
asm volatile (
"lock; cmpxchgl %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
return ret;
}
#else
# error "Missing implementation for 32-bit atomic operations"
#endif
#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
/* Unsigned */
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
{
return __sync_fetch_and_add(p, x);
@@ -199,6 +326,22 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
return __sync_fetch_and_and(p, x);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{
return __sync_fetch_and_add(p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x)
{
return __sync_fetch_and_or(p, x);
}
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
{
return __sync_fetch_and_and(p, x);
}
#else
# error "Missing implementation for 32-bit atomic operations"
#endif
@@ -206,6 +349,7 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
/******************************************************************************/
/* 8-bit operations. */
#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1))
/* Unsigned */
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
{
return __sync_fetch_and_and(p, b);
@@ -214,6 +358,17 @@ ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
{
return __sync_fetch_and_or(p, b);
}
/* Signed */
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
{
return __sync_fetch_and_and(p, b);
}
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
{
return __sync_fetch_and_or(p, b);
}
#else
# error "Missing implementation for 8-bit atomic operations"
#endif

View File

@@ -2407,10 +2407,10 @@ void BKE_library_filepath_set(Library *lib, const char *filepath)
void BKE_id_tag_set_atomic(ID *id, int tag)
{
atomic_fetch_and_or_uint32((uint32_t *)&id->tag, tag);
atomic_fetch_and_or_int32(&id->tag, tag);
}
void BKE_id_tag_clear_atomic(ID *id, int tag)
{
atomic_fetch_and_and_uint32((uint32_t *)&id->tag, ~tag);
atomic_fetch_and_and_int32(&id->tag, ~tag);
}

View File

@@ -327,14 +327,14 @@ void BKE_object_free_derived_caches(Object *ob)
Mesh *me = ob->data;
if (me && me->bb) {
atomic_fetch_and_or_uint32((uint *)&me->bb->flag, BOUNDBOX_DIRTY);
atomic_fetch_and_or_int32(&me->bb->flag, BOUNDBOX_DIRTY);
}
}
else if (ELEM(ob->type, OB_SURF, OB_CURVE, OB_FONT)) {
Curve *cu = ob->data;
if (cu && cu->bb) {
atomic_fetch_and_or_uint32((uint *)&cu->bb->flag, BOUNDBOX_DIRTY);
atomic_fetch_and_or_int32(&cu->bb->flag, BOUNDBOX_DIRTY);
}
}

View File

@@ -1008,9 +1008,7 @@ static void pbvh_update_normals_store_task_cb(void *userdata, const int n)
MVert *mvert = &bvh->verts[v];
/* mvert is shared between nodes, hence between threads. */
if (atomic_fetch_and_and_uint8(
(uint8_t *)&mvert->flag, (uint8_t)~ME_VERT_PBVH_UPDATE) & ME_VERT_PBVH_UPDATE)
{
if (atomic_fetch_and_and_char(&mvert->flag, (char)~ME_VERT_PBVH_UPDATE) & ME_VERT_PBVH_UPDATE) {
normalize_v3(vnors[v]);
normal_float_to_short_v3(mvert->no, vnors[v]);
}

View File

@@ -1003,8 +1003,7 @@ BLI_INLINE bool parallel_range_next_iter_get(
ParallelRangeState * __restrict state,
int * __restrict iter, int * __restrict count)
{
uint32_t uval = atomic_fetch_and_add_uint32((uint32_t *)(&state->iter), state->chunk_size);
int previter = *(int32_t *)&uval;
int previter = atomic_fetch_and_add_int32(&state->iter, state->chunk_size);
*iter = previter;
*count = max_ii(0, min_ii(state->chunk_size, state->stop - previter));
@@ -1124,7 +1123,7 @@ static void task_parallel_range_ex(
}
num_tasks = min_ii(num_tasks, (stop - start) / state.chunk_size);
atomic_fetch_and_add_uint32((uint32_t *)(&state.iter), 0);
atomic_fetch_and_add_int32(&state.iter, 0);
if (use_userdata_chunk) {
userdata_chunk_array = MALLOCA(userdata_chunk_size * num_tasks);