Use atomic operations instead of spin lock for threaded update
This replaces code (pseudo-code): spin_lock(); update_child_dag_nodes(); schedule_new_nodes(); spin_unlock(); with: update_child_dag_nodes_with_atomic_ops(); schedule_new_nodes(); The reason for this is that scheduling new nodes implies mutex lock, and having spin around it is a bad idea. Alternatives could have been to use spinlock around child nodes update only, but that would either imply having either per-node spin-lock or using array to put nodes ready for update to an array. Didn't like an alternatives, using atomic operations makes code much easier to follow, keeps data-flow on cpu nice. Same atomic ops might be used in other performance-critical areas later. Using atomic ops implementation from jemalloc project.
This commit is contained in:
291
intern/atomic/atomic_ops.h
Normal file
291
intern/atomic/atomic_ops.h
Normal file
@@ -0,0 +1,291 @@
|
||||
/*
|
||||
* Adopted from jemalloc with this license:
|
||||
*
|
||||
* Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
|
||||
* All rights reserved.
|
||||
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||
* this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ATOMIC_OPS_H__
|
||||
#define ATOMIC_OPS_H__
|
||||
|
||||
/* needed for int types */
|
||||
#include "../../source/blender/blenlib/BLI_sys_types.h"
|
||||
|
||||
/* little macro so inline keyword works */
|
||||
#if defined(_MSC_VER)
|
||||
# define ATOMIC_INLINE static __forceinline
|
||||
#else
|
||||
# if (defined(__APPLE__) && defined(__ppc__))
|
||||
/* static inline __attribute__ here breaks osx ppc gcc42 build */
|
||||
# define ATOMIC_INLINE static __attribute__((always_inline))
|
||||
# else
|
||||
# define ATOMIC_INLINE static inline __attribute__((always_inline))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)
|
||||
# define LG_SIZEOF_PTR 3
|
||||
# define LG_SIZEOF_INT 3
|
||||
#else
|
||||
# define LG_SIZEOF_PTR 2
|
||||
# define LG_SIZEOF_INT 2
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* 64-bit operations. */
|
||||
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
|
||||
# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (__sync_add_and_fetch(p, x));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (__sync_sub_and_fetch(p, x));
|
||||
}
|
||||
#elif (defined(_MSC_VER))
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (InterlockedExchangeAdd64(p, x));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (InterlockedExchangeAdd64(p, -((int64_t)x)));
|
||||
}
|
||||
#elif (defined(JEMALLOC_OSATOMIC))
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
|
||||
}
|
||||
# elif (defined(__amd64__) || defined(__x86_64__))
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
asm volatile (
|
||||
"lock; xaddq %0, %1;"
|
||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||
: "m" (*p) /* Inputs. */
|
||||
);
|
||||
return (x);
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
x = (uint64_t)(-(int64_t)x);
|
||||
asm volatile (
|
||||
"lock; xaddq %0, %1;"
|
||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||
: "m" (*p) /* Inputs. */
|
||||
);
|
||||
return (x);
|
||||
}
|
||||
# elif (defined(JEMALLOC_ATOMIC9))
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
/*
|
||||
* atomic_fetchadd_64() doesn't exist, but we only ever use this
|
||||
* function on LP64 systems, so atomic_fetchadd_long() will do.
|
||||
*/
|
||||
assert(sizeof(uint64_t) == sizeof(unsigned long));
|
||||
|
||||
return (atomic_fetchadd_long(p, (unsigned long)x) + x);
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
assert(sizeof(uint64_t) == sizeof(unsigned long));
|
||||
|
||||
return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
|
||||
}
|
||||
# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (__sync_add_and_fetch(p, x));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint64_t
|
||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||
{
|
||||
return (__sync_sub_and_fetch(p, x));
|
||||
}
|
||||
# else
|
||||
# error "Missing implementation for 64-bit atomic operations"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* 32-bit operations. */
|
||||
#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (__sync_add_and_fetch(p, x));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (__sync_sub_and_fetch(p, x));
|
||||
}
|
||||
#elif (defined(_MSC_VER))
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (InterlockedExchangeAdd(p, x));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (InterlockedExchangeAdd(p, -((int32_t)x)));
|
||||
}
|
||||
#elif (defined(JEMALLOC_OSATOMIC))
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
|
||||
}
|
||||
#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
asm volatile (
|
||||
"lock; xaddl %0, %1;"
|
||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||
: "m" (*p) /* Inputs. */
|
||||
);
|
||||
return (x);
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
x = (uint32_t)(-(int32_t)x);
|
||||
asm volatile (
|
||||
"lock; xaddl %0, %1;"
|
||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||
: "m" (*p) /* Inputs. */
|
||||
);
|
||||
return (x);
|
||||
}
|
||||
#elif (defined(JEMALLOC_ATOMIC9))
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (atomic_fetchadd_32(p, x) + x);
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
|
||||
}
|
||||
#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (__sync_add_and_fetch(p, x));
|
||||
}
|
||||
|
||||
ATOMIC_INLINE uint32_t
|
||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||
{
|
||||
return (__sync_sub_and_fetch(p, x));
|
||||
}
|
||||
#else
|
||||
# error "Missing implementation for 32-bit atomic operations"
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* size_t operations. */
|
||||
ATOMIC_INLINE size_t
|
||||
atomic_add_z(size_t *p, size_t x)
|
||||
{
|
||||
#if (LG_SIZEOF_PTR == 3)
|
||||
return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
|
||||
#elif (LG_SIZEOF_PTR == 2)
|
||||
return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE size_t
|
||||
atomic_sub_z(size_t *p, size_t x)
|
||||
{
|
||||
#if (LG_SIZEOF_PTR == 3)
|
||||
return ((size_t)atomic_add_uint64((uint64_t *)p,
|
||||
(uint64_t)-((int64_t)x)));
|
||||
#elif (LG_SIZEOF_PTR == 2)
|
||||
return ((size_t)atomic_add_uint32((uint32_t *)p,
|
||||
(uint32_t)-((int32_t)x)));
|
||||
#endif
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* unsigned operations. */
|
||||
ATOMIC_INLINE unsigned
|
||||
atomic_add_u(unsigned *p, unsigned x)
|
||||
{
|
||||
#if (LG_SIZEOF_INT == 3)
|
||||
return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
|
||||
#elif (LG_SIZEOF_INT == 2)
|
||||
return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
|
||||
#endif
|
||||
}
|
||||
|
||||
ATOMIC_INLINE unsigned
|
||||
atomic_sub_u(unsigned *p, unsigned x)
|
||||
{
|
||||
#if (LG_SIZEOF_INT == 3)
|
||||
return ((unsigned)atomic_add_uint64((uint64_t *)p,
|
||||
(uint64_t)-((int64_t)x)));
|
||||
#elif (LG_SIZEOF_INT == 2)
|
||||
return ((unsigned)atomic_add_uint32((uint32_t *)p,
|
||||
(uint32_t)-((int32_t)x)));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* ATOMIC_OPS_H__ */
|
||||
@@ -45,6 +45,7 @@ set(INC
|
||||
../../../intern/raskter
|
||||
../../../intern/smoke/extern
|
||||
../../../extern/libmv
|
||||
../../../intern/atomic
|
||||
|
||||
# XXX - BAD LEVEL CALL WM_api.h
|
||||
../windowmanager
|
||||
|
||||
@@ -52,6 +52,7 @@ incs = [
|
||||
'#/intern/iksolver/extern',
|
||||
'#/intern/opennl/extern',
|
||||
'#/intern/smoke/extern',
|
||||
'#/intern/atomic',
|
||||
'../avi',
|
||||
'../blenfont',
|
||||
'../blenlib',
|
||||
|
||||
@@ -94,11 +94,11 @@ typedef struct DagNode {
|
||||
struct DagNode *next;
|
||||
|
||||
/* Threaded evaluation routines */
|
||||
int valency; /* valency of the node is a number of parents which are not updated yet
|
||||
* this node has got.
|
||||
* Used by threaded update for faster detect whether node could be
|
||||
* updated aready.
|
||||
*/
|
||||
uint32_t valency; /* valency of the node is a number of parents which are not updated yet
|
||||
* this node has got.
|
||||
* Used by threaded update for faster detect whether node could be
|
||||
* updated aready.
|
||||
*/
|
||||
} DagNode;
|
||||
|
||||
typedef struct DagNodeQueueElem {
|
||||
|
||||
@@ -78,6 +78,8 @@
|
||||
#include "BKE_screen.h"
|
||||
#include "BKE_tracking.h"
|
||||
|
||||
#include "atomic_ops.h"
|
||||
|
||||
#include "depsgraph_private.h"
|
||||
|
||||
/* Queue and stack operations for dag traversal
|
||||
@@ -2737,7 +2739,7 @@ void DAG_threaded_update_handle_node_updated(void *node_v,
|
||||
|
||||
for (itA = node->child; itA; itA = itA->next) {
|
||||
if (itA->node != node) {
|
||||
itA->node->valency--;
|
||||
atomic_sub_uint32(&itA->node->valency, 1);
|
||||
|
||||
if (itA->node->valency == 0) {
|
||||
func(itA->node, user_data);
|
||||
|
||||
@@ -1171,7 +1171,6 @@ typedef struct StatisicsEntry {
|
||||
typedef struct ThreadedObjectUpdateState {
|
||||
Scene *scene;
|
||||
Scene *scene_parent;
|
||||
SpinLock lock;
|
||||
|
||||
/* Execution statistics */
|
||||
ListBase statistics[BLENDER_MAX_THREADS];
|
||||
@@ -1245,10 +1244,8 @@ static void scene_update_object_func(TaskPool *pool, void *taskdata, int threadi
|
||||
DAG_threaded_update_get_node_name(node));
|
||||
}
|
||||
|
||||
BLI_spin_lock(&state->lock);
|
||||
/* Update will decrease child's valency and schedule child with zero valency. */
|
||||
DAG_threaded_update_handle_node_updated(node,scene_update_object_add_task, pool);
|
||||
BLI_spin_unlock(&state->lock);
|
||||
|
||||
#undef PRINT
|
||||
}
|
||||
@@ -1321,7 +1318,6 @@ static void scene_update_objects(Scene *scene, Scene *scene_parent, bool use_thr
|
||||
state.scene_parent = scene_parent;
|
||||
memset(state.statistics, 0, sizeof(state.statistics));
|
||||
state.has_updated_objects = false;
|
||||
BLI_spin_init(&state.lock);
|
||||
|
||||
task_pool = BLI_task_pool_create(task_scheduler, &state);
|
||||
|
||||
@@ -1343,9 +1339,7 @@ static void scene_update_objects(Scene *scene, Scene *scene_parent, bool use_thr
|
||||
* to run into situations when the same task is adding twice to the
|
||||
* queue due to non-safe nature of function below.
|
||||
*/
|
||||
BLI_spin_lock(&state.lock);
|
||||
DAG_threaded_update_foreach_ready_node(scene, scene_update_object_add_task, task_pool);
|
||||
BLI_spin_unlock(&state.lock);
|
||||
|
||||
/* work and wait until tasks are done */
|
||||
BLI_task_pool_work_and_wait(task_pool);
|
||||
@@ -1355,8 +1349,6 @@ static void scene_update_objects(Scene *scene, Scene *scene_parent, bool use_thr
|
||||
|
||||
BLI_end_threaded_malloc();
|
||||
|
||||
BLI_spin_end(&state.lock);
|
||||
|
||||
if (G.debug & G_DEBUG) {
|
||||
print_threads_statistics(&state);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user