Use one global task scheduler for all the tasks

Instead of allocating scheduler and starting threads
on every object_update call make it so scheduler is
global and threads are always ready to run.

This was we could avoid quite hacky thing which is
counting how much objects need to be updated before
starting threaded update.

It'll also allow using the same scheduler to do all
sorts of tasks, not only objects update in the same
scheduler. This is nice from load balancing point of
view.

Couple of changes were needed to be done in task
scheduler itself:

- Free tas before sending notifier.
- Free TaskThread structures outside from thread.

This is needed to make it possible use begin/end
threaded malloc from the main thread before/after
running the pool. Without this change it was possible
that allocation will switch to non-thread mode while
thread is freeing the task.

This required storing TaskThread array in Scheduler,
but it's not so much bad actually, since it also
reduces memory overhead caused by per-thread allocation.
This commit is contained in:
Sergey Sharybin
2013-07-31 21:55:36 +00:00
parent 853f2e05c8
commit c847198a7b
5 changed files with 47 additions and 73 deletions

View File

@@ -1179,30 +1179,12 @@ typedef struct ThreadedObjectUpdateState {
#ifdef ENABLE_THREAD_STATISTICS
ListBase statistics[64];
int tot_thread;
#endif
} ThreadedObjectUpdateState;
static void scene_update_object_add_task(void *node, void *user_data);
static void scene_update_all_bases(Scene *scene, Scene *scene_parent)
{
Base *base;
for (base = scene->base.first; base; base = base->next) {
Object *object = base->object;
BKE_object_handle_update_ex(scene_parent, object, scene->rigidbody_world);
if (object->dup_group && (object->transflag & OB_DUPLIGROUP))
BKE_group_handle_recalc_and_update(scene_parent, object, object->dup_group);
/* always update layer, so that animating layers works (joshua july 2010) */
/* XXX commented out, this has depsgraph issues anyway - and this breaks setting scenes
* (on scene-set, the base-lay is copied to ob-lay (ton nov 2012) */
// base->lay = ob->lay;
}
}
static void scene_update_object_func(TaskPool *pool, void *taskdata, int threadid)
{
#define PRINT if (G.debug & G_DEBUG) printf
@@ -1237,6 +1219,8 @@ static void scene_update_object_func(TaskPool *pool, void *taskdata, int threadi
if (G.debug & G_DEBUG) {
StatisicsEntry *entry;
state->tot_thread = max_ii(state->tot_thread, threadid + 1);
entry = MEM_mallocN(sizeof(StatisicsEntry), "update thread statistics");
entry->object = object;
entry->time = PIL_check_seconds_timer() - start_time;
@@ -1266,11 +1250,11 @@ static void scene_update_object_add_task(void *node, void *user_data)
}
#ifdef ENABLE_THREAD_STATISTICS
static void print_threads_statistics(ThreadedObjectUpdateState *state, int tot_thread)
static void print_threads_statistics(ThreadedObjectUpdateState *state)
{
int i;
for (i = 0; i < tot_thread; i++) {
for (i = 0; i < state->tot_thread; i++) {
int total_objects = 0;
double total_time = 0.0;
StatisicsEntry *entry;
@@ -1297,25 +1281,11 @@ static void print_threads_statistics(ThreadedObjectUpdateState *state, int tot_t
}
#endif
static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
static void scene_update_objects(Scene *scene, Scene *scene_parent)
{
TaskScheduler *task_scheduler;
TaskScheduler *task_scheduler = BLI_task_scheduler_get();
TaskPool *task_pool;
ThreadedObjectUpdateState state;
int tot_thread = BLI_system_thread_count();
if (tot_thread == 1) {
/* If only one thread is possible we don't bother self with
* task pool, which would be an overhead in cas e of single
* CPU core.
*/
scene_update_all_bases(scene, scene_parent);
return;
}
if (G.debug_value == 13666) {
tot_thread = 1;
}
/* Ensure malloc will go go fine from threads,
* this is needed because we could be in main thread here
@@ -1328,10 +1298,10 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
state.scene_parent = scene_parent;
#ifdef ENABLE_THREAD_STATISTICS
memset(state.statistics, 0, sizeof(state.statistics));
state.tot_thread = 0;
#endif
BLI_spin_init(&state.lock);
task_scheduler = BLI_task_scheduler_create(tot_thread);
task_pool = BLI_task_pool_create(task_scheduler, &state);
/* Initialize run-time data in the graph needed for traversing it
@@ -1361,7 +1331,6 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
/* free */
BLI_task_pool_free(task_pool);
BLI_task_scheduler_free(task_scheduler);
BLI_end_threaded_malloc();
@@ -1369,7 +1338,7 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
#ifdef ENABLE_THREAD_STATISTICS
if (G.debug & G_DEBUG) {
print_threads_statistics(&state, tot_thread);
print_threads_statistics(&state);
}
#endif
@@ -1409,30 +1378,6 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
}
}
static void scene_update_objects(Scene *scene, Scene *scene_parent)
{
Base *base;
int update_count = 0;
/* Optimization thing: don't do threads if no modifier
* stack need to be evaluated.
*/
for (base = scene->base.first; base; base = base->next) {
Object *ob = base->object;
if (ob->recalc & OB_RECALC_ALL) {
update_count++;
}
}
if (update_count > 1) {
scene_update_objects_threaded(scene, scene_parent);
}
else {
scene_update_all_bases(scene, scene_parent);
}
}
static void scene_update_tagged_recursive(Main *bmain, Scene *scene, Scene *scene_parent)
{
scene->customdata_mask = scene_parent->customdata_mask;

View File

@@ -45,11 +45,15 @@ extern "C" {
#define BLENDER_MAX_THREADS 64
struct ListBase;
struct TaskScheduler;
/* Threading API */
/*this is run once at startup*/
void BLI_threadapi_init(void);
void BLI_threadapi_exit(void);
struct TaskScheduler *BLI_task_scheduler_get(void);
void BLI_init_threads(struct ListBase *threadbase, void *(*do_thread)(void *), int tot);
int BLI_available_threads(struct ListBase *threadbase);

View File

@@ -53,6 +53,7 @@ struct TaskPool {
struct TaskScheduler {
pthread_t *threads;
struct TaskThread *task_threads;
int num_threads;
ListBase queue;
@@ -124,19 +125,19 @@ static void *task_scheduler_thread_run(void *thread_p)
/* keep popping off tasks */
while (task_scheduler_thread_wait_pop(scheduler, &task)) {
/* run task */
task->run(task->pool, task->taskdata, thread_id);
TaskPool *pool = task->pool;
/* notify pool task was done */
task_pool_num_decrease(task->pool, 1);
/* run task */
task->run(pool, task->taskdata, thread_id);
/* delete task */
if (task->free_taskdata)
MEM_freeN(task->taskdata);
MEM_freeN(task);
}
MEM_freeN(thread);
/* notify pool task was done */
task_pool_num_decrease(pool, 1);
}
return NULL;
}
@@ -167,9 +168,10 @@ TaskScheduler *BLI_task_scheduler_create(int num_threads)
scheduler->num_threads = num_threads;
scheduler->threads = MEM_callocN(sizeof(pthread_t) * num_threads, "TaskScheduler threads");
scheduler->task_threads = MEM_callocN(sizeof(TaskThread) * num_threads, "TaskScheduler task threads");
for (i = 0; i < num_threads; i++) {
TaskThread *thread = MEM_callocN(sizeof(TaskThread), "TaskThread");
TaskThread *thread = &scheduler->task_threads[i];
thread->scheduler = scheduler;
thread->id = i + 1;
@@ -205,6 +207,11 @@ void BLI_task_scheduler_free(TaskScheduler *scheduler)
MEM_freeN(scheduler->threads);
}
/* Delete task thread data */
if (scheduler->task_threads) {
MEM_freeN(scheduler->task_threads);
}
/* delete leftover tasks */
for (task = scheduler->queue.first; task; task = task->next) {
if (task->free_taskdata)

View File

@@ -37,6 +37,7 @@
#include "BLI_listbase.h"
#include "BLI_gsqueue.h"
#include "BLI_task.h"
#include "BLI_threads.h"
#include "PIL_time.h"
@@ -63,6 +64,9 @@ extern pthread_key_t gomp_tls_key;
static void *thread_tls_data;
#endif
/* We're using one global task scheduler for all kind of tasks. */
static TaskScheduler *task_scheduler = NULL;
/* ********** basic thread control API ************
*
* Many thread cases have an X amount of jobs, and only an Y amount of
@@ -144,7 +148,19 @@ static void BLI_unlock_malloc_thread(void)
void BLI_threadapi_init(void)
{
int tot_thread = BLI_system_thread_count();
mainid = pthread_self();
task_scheduler = BLI_task_scheduler_create(tot_thread);
}
void BLI_threadapi_exit(void)
{
BLI_task_scheduler_free(task_scheduler);
}
TaskScheduler *BLI_task_scheduler_get(void)
{
return task_scheduler;
}
/* tot = 0 only initializes malloc mutex in a safe way (see sequence.c)
@@ -871,4 +887,3 @@ void BLI_end_threaded_malloc(void)
if (thread_levels == 0)
MEM_set_lock_callback(NULL, NULL);
}

View File

@@ -51,6 +51,7 @@
#include "BLI_listbase.h"
#include "BLI_path_util.h"
#include "BLI_string.h"
#include "BLI_threads.h"
#include "BLI_utildefines.h"
#include "BKE_blender.h"
@@ -510,12 +511,14 @@ void WM_exit_ext(bContext *C, const short do_python)
GHOST_DisposeSystemPaths();
BLI_threadapi_exit();
if (MEM_get_memory_blocks_in_use() != 0) {
printf("Error: Not freed memory blocks: %d\n", MEM_get_memory_blocks_in_use());
MEM_printmemlist();
}
wm_autosave_delete();
printf("\nBlender quit\n");
#ifdef WIN32