Use one global task scheduler for all the tasks

Instead of allocating scheduler and starting threads on every object_update call make it so scheduler is global and threads are always ready to run. This was we could avoid quite hacky thing which is counting how much objects need to be updated before starting threaded update. It'll also allow using the same scheduler to do all sorts of tasks, not only objects update in the same scheduler. This is nice from load balancing point of view. Couple of changes were needed to be done in task scheduler itself: - Free tas before sending notifier. - Free TaskThread structures outside from thread. This is needed to make it possible use begin/end threaded malloc from the main thread before/after running the pool. Without this change it was possible that allocation will switch to non-thread mode while thread is freeing the task. This required storing TaskThread array in Scheduler, but it's not so much bad actually, since it also reduces memory overhead caused by per-thread allocation.
2013-07-31 21:55:36 +00:00
parent 853f2e05c8
commit c847198a7b
5 changed files with 47 additions and 73 deletions
--- a/source/blender/blenkernel/intern/scene.c
+++ b/source/blender/blenkernel/intern/scene.c
@@ -1179,30 +1179,12 @@ typedef struct ThreadedObjectUpdateState {

 #ifdef ENABLE_THREAD_STATISTICS
 	ListBase statistics[64];
+	int tot_thread;
 #endif
 } ThreadedObjectUpdateState;

 static void scene_update_object_add_task(void *node, void *user_data);

-static void scene_update_all_bases(Scene *scene, Scene *scene_parent)
-{
-	Base *base;
-
-	for (base = scene->base.first; base; base = base->next) {
-		Object *object = base->object;
-
-		BKE_object_handle_update_ex(scene_parent, object, scene->rigidbody_world);
-
-		if (object->dup_group && (object->transflag & OB_DUPLIGROUP))
-			BKE_group_handle_recalc_and_update(scene_parent, object, object->dup_group);
-
-		/* always update layer, so that animating layers works (joshua july 2010) */
-		/* XXX commented out, this has depsgraph issues anyway - and this breaks setting scenes
-		 * (on scene-set, the base-lay is copied to ob-lay (ton nov 2012) */
-		// base->lay = ob->lay;
-	}
-}
-
 static void scene_update_object_func(TaskPool *pool, void *taskdata, int threadid)
 {
 #define PRINT if (G.debug & G_DEBUG) printf
@@ -1237,6 +1219,8 @@ static void scene_update_object_func(TaskPool *pool, void *taskdata, int threadi
 		if (G.debug & G_DEBUG) {
 			StatisicsEntry *entry;

+			state->tot_thread = max_ii(state->tot_thread, threadid + 1);
+
 			entry = MEM_mallocN(sizeof(StatisicsEntry), "update thread statistics");
 			entry->object = object;
 			entry->time = PIL_check_seconds_timer() - start_time;
@@ -1266,11 +1250,11 @@ static void scene_update_object_add_task(void *node, void *user_data)
 }

 #ifdef ENABLE_THREAD_STATISTICS
-static void print_threads_statistics(ThreadedObjectUpdateState *state, int tot_thread)
+static void print_threads_statistics(ThreadedObjectUpdateState *state)
 {
 	int i;

-	for (i = 0; i < tot_thread; i++) {
+	for (i = 0; i < state->tot_thread; i++) {
 		int total_objects = 0;
 		double total_time = 0.0;
 		StatisicsEntry *entry;
@@ -1297,25 +1281,11 @@ static void print_threads_statistics(ThreadedObjectUpdateState *state, int tot_t
 }
 #endif

-static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
+static void scene_update_objects(Scene *scene, Scene *scene_parent)
 {
-	TaskScheduler *task_scheduler;
+	TaskScheduler *task_scheduler = BLI_task_scheduler_get();
 	TaskPool *task_pool;
 	ThreadedObjectUpdateState state;
-	int tot_thread = BLI_system_thread_count();
-
-	if (tot_thread == 1) {
-		/* If only one thread is possible we don't bother self with
-		 * task pool, which would be an overhead in cas e of single
-		 * CPU core.
-		 */
-		scene_update_all_bases(scene, scene_parent);
-		return;
-	}
-
-	if (G.debug_value == 13666) {
-		tot_thread = 1;
-	}

 	/* Ensure malloc will go go fine from threads,
 	 * this is needed because we could be in main thread here
@@ -1328,10 +1298,10 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
 	state.scene_parent = scene_parent;
 #ifdef ENABLE_THREAD_STATISTICS
 	memset(state.statistics, 0, sizeof(state.statistics));
+	state.tot_thread = 0;
 #endif
 	BLI_spin_init(&state.lock);

-	task_scheduler = BLI_task_scheduler_create(tot_thread);
 	task_pool = BLI_task_pool_create(task_scheduler, &state);

 	/* Initialize run-time data in the graph needed for traversing it
@@ -1361,7 +1331,6 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)

 	/* free */
 	BLI_task_pool_free(task_pool);
-	BLI_task_scheduler_free(task_scheduler);

 	BLI_end_threaded_malloc();

@@ -1369,7 +1338,7 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)

 #ifdef ENABLE_THREAD_STATISTICS
 	if (G.debug & G_DEBUG) {
-		print_threads_statistics(&state, tot_thread);
+		print_threads_statistics(&state);
 	}
 #endif

@@ -1409,30 +1378,6 @@ static void scene_update_objects_threaded(Scene *scene, Scene *scene_parent)
 	}
 }

-static void scene_update_objects(Scene *scene, Scene *scene_parent)
-{
-	Base *base;
-	int update_count = 0;
-
-	/* Optimization thing: don't do threads if no modifier
-	 * stack need to be evaluated.
-	 */
-	for (base = scene->base.first; base; base = base->next) {
-		Object *ob = base->object;
-
-		if (ob->recalc & OB_RECALC_ALL) {
-			update_count++;
-		}
-	}
-
-	if (update_count > 1) {
-		scene_update_objects_threaded(scene, scene_parent);
-	}
-	else {
-		scene_update_all_bases(scene, scene_parent);
-	}
-}
-
 static void scene_update_tagged_recursive(Main *bmain, Scene *scene, Scene *scene_parent)
 {
 	scene->customdata_mask = scene_parent->customdata_mask;
--- a/source/blender/blenlib/BLI_threads.h
+++ b/source/blender/blenlib/BLI_threads.h
@@ -45,11 +45,15 @@ extern "C" {
 #define BLENDER_MAX_THREADS     64

 struct ListBase;
+struct TaskScheduler;

 /* Threading API */

 /*this is run once at startup*/
 void BLI_threadapi_init(void);
+void BLI_threadapi_exit(void);
+
+struct TaskScheduler *BLI_task_scheduler_get(void);

 void    BLI_init_threads(struct ListBase *threadbase, void *(*do_thread)(void *), int tot);
 int     BLI_available_threads(struct ListBase *threadbase);
--- a/source/blender/blenlib/intern/task.c
+++ b/source/blender/blenlib/intern/task.c
@@ -53,6 +53,7 @@ struct TaskPool {

 struct TaskScheduler {
 	pthread_t *threads;
+	struct TaskThread *task_threads;
 	int num_threads;

 	ListBase queue;
@@ -124,19 +125,19 @@ static void *task_scheduler_thread_run(void *thread_p)

 	/* keep popping off tasks */
 	while (task_scheduler_thread_wait_pop(scheduler, &task)) {
-		/* run task */
-		task->run(task->pool, task->taskdata, thread_id);
+		TaskPool *pool = task->pool;

-		/* notify pool task was done */
-		task_pool_num_decrease(task->pool, 1);
+		/* run task */
+		task->run(pool, task->taskdata, thread_id);

 		/* delete task */
 		if (task->free_taskdata)
 			MEM_freeN(task->taskdata);
 		MEM_freeN(task);
-	}

-	MEM_freeN(thread);
+		/* notify pool task was done */
+		task_pool_num_decrease(pool, 1);
+	}

 	return NULL;
 }
@@ -167,9 +168,10 @@ TaskScheduler *BLI_task_scheduler_create(int num_threads)

 		scheduler->num_threads = num_threads;
 		scheduler->threads = MEM_callocN(sizeof(pthread_t) * num_threads, "TaskScheduler threads");
+		scheduler->task_threads = MEM_callocN(sizeof(TaskThread) * num_threads, "TaskScheduler task threads");

 		for (i = 0; i < num_threads; i++) {
-			TaskThread *thread = MEM_callocN(sizeof(TaskThread), "TaskThread");
+			TaskThread *thread = &scheduler->task_threads[i];
 			thread->scheduler = scheduler;
 			thread->id = i + 1;

@@ -205,6 +207,11 @@ void BLI_task_scheduler_free(TaskScheduler *scheduler)
 		MEM_freeN(scheduler->threads);
 	}

+	/* Delete task thread data */
+	if (scheduler->task_threads) {
+		MEM_freeN(scheduler->task_threads);
+	}
+
 	/* delete leftover tasks */
 	for (task = scheduler->queue.first; task; task = task->next) {
 		if (task->free_taskdata)
--- a/source/blender/blenlib/intern/threads.c
+++ b/source/blender/blenlib/intern/threads.c
@@ -37,6 +37,7 @@

 #include "BLI_listbase.h"
 #include "BLI_gsqueue.h"
+#include "BLI_task.h"
 #include "BLI_threads.h"

 #include "PIL_time.h"
@@ -63,6 +64,9 @@ extern pthread_key_t gomp_tls_key;
 static void *thread_tls_data;
 #endif

+/* We're using one global task scheduler for all kind of tasks. */
+static TaskScheduler *task_scheduler = NULL;
+
 /* ********** basic thread control API ************ 
 * 
 * Many thread cases have an X amount of jobs, and only an Y amount of
@@ -144,7 +148,19 @@ static void BLI_unlock_malloc_thread(void)

 void BLI_threadapi_init(void)
 {
+	int tot_thread = BLI_system_thread_count();
 	mainid = pthread_self();
+	task_scheduler = BLI_task_scheduler_create(tot_thread);
+}
+
+void BLI_threadapi_exit(void)
+{
+	BLI_task_scheduler_free(task_scheduler);
+}
+
+TaskScheduler *BLI_task_scheduler_get(void)
+{
+	return task_scheduler;
 }

 /* tot = 0 only initializes malloc mutex in a safe way (see sequence.c)
@@ -871,4 +887,3 @@ void BLI_end_threaded_malloc(void)
 	if (thread_levels == 0)
 		MEM_set_lock_callback(NULL, NULL);
 }
-
--- a/source/blender/windowmanager/intern/wm_init_exit.c
+++ b/source/blender/windowmanager/intern/wm_init_exit.c
@@ -51,6 +51,7 @@
 #include "BLI_listbase.h"
 #include "BLI_path_util.h"
 #include "BLI_string.h"
+#include "BLI_threads.h"
 #include "BLI_utildefines.h"

 #include "BKE_blender.h"
@@ -510,12 +511,14 @@ void WM_exit_ext(bContext *C, const short do_python)
 	
 	GHOST_DisposeSystemPaths();

+	BLI_threadapi_exit();
+
 	if (MEM_get_memory_blocks_in_use() != 0) {
 		printf("Error: Not freed memory blocks: %d\n", MEM_get_memory_blocks_in_use());
 		MEM_printmemlist();
 	}
 	wm_autosave_delete();
-	
+
 	printf("\nBlender quit\n");
 	
 #ifdef WIN32