DRW: Reuse DRWCallState for the same object.

This enables caching the matrices and reducing redraw time of the same object which is particulary important for eevee.
2018-02-28 16:23:33 +01:00
parent 1ba96857d1
commit 64e35f6fd2
4 changed files with 64 additions and 17 deletions
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -372,7 +372,7 @@ static void drw_viewport_var_init(void)
 			DST.vmempool->calls = BLI_mempool_create(MAX2(sizeof(DRWCall), sizeof(DRWCallGenerate)), 0, 512, 0);
 		}
 		if (DST.vmempool->states == NULL) {
-			DST.vmempool->states = BLI_mempool_create(sizeof(DRWCallState), 0, 512, 0);
+			DST.vmempool->states = BLI_mempool_create(sizeof(DRWCallState), 0, 512, BLI_MEMPOOL_ALLOW_ITER);
 		}
 		if (DST.vmempool->shgroups == NULL) {
 			DST.vmempool->shgroups = BLI_mempool_create(sizeof(DRWShadingGroup), 0, 256, 0);
@@ -438,6 +438,9 @@ static void drw_viewport_var_init(void)
 	}

 	DST.override_mat = 0;
+	DST.dirty_mat = false;
+	DST.state_cache_id = 1;
+
 	memset(DST.common_instance_data, 0x0, sizeof(DST.common_instance_data));
 }

@@ -453,12 +456,14 @@ void DRW_viewport_matrix_override_set(float mat[4][4], DRWViewportMatrixType typ
 {
 	copy_m4_m4(DST.view_data.mat[type], mat);
 	DST.override_mat |= (1 << type);
+	DST.dirty_mat = true;
 }

 void DRW_viewport_matrix_override_unset(DRWViewportMatrixType type)
 {
 	copy_m4_m4(DST.view_data.mat[type], DST.original_mat[type]);
 	DST.override_mat &= ~(1 << type);
+	DST.dirty_mat = true;
 }

 bool DRW_viewport_is_persp_get(void)
@@ -628,6 +633,8 @@ static void drw_engines_cache_init(void)

 static void drw_engines_cache_populate(Object *ob)
 {
+	DST.ob_state = NULL;
+
 	for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
 		DrawEngineType *engine = link->data;
 		ViewportEngineData *data = drw_viewport_engine_data_ensure(engine);
@@ -1324,6 +1331,7 @@ void DRW_render_object_iter(
 {
 	DEG_OBJECT_ITER_FOR_RENDER_ENGINE(depsgraph, ob, DRW_iterator_mode_get())
 	{
+		DST.ob_state = NULL;
 		callback(vedata, ob, engine, depsgraph);
 	}
 	DEG_OBJECT_ITER_FOR_RENDER_ENGINE_END
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -93,7 +93,8 @@ typedef struct DRWCallHeader {

 typedef struct DRWCallState {
 	unsigned char flag;
-	uint16_t matflag;
+	unsigned char cache_id;   /* Compared with DST.state_cache_id to see if matrices are still valid. */
+	uint16_t matflag;         /* Which matrices to compute. */
 	/* Culling: Using Bounding Sphere for now for faster culling.
 	 * Not ideal for planes. */
 	struct {
@@ -254,12 +255,11 @@ typedef struct DRWManager {
 	/* TODO clean up this struct a bit */
 	/* Cache generation */
 	ViewportMemoryPool *vmempool;
-	DRWUniform *last_uniform;
-	DRWCall *last_call;
-	DRWCallGenerate *last_callgenerate;
-	DRWShadingGroup *last_shgroup;
 	DRWInstanceDataList *idatalist;
 	DRWInstanceData *common_instance_data[MAX_INSTANCE_DATA_SIZE];
+	/* State of the object being evaluated if already allocated. */
+	DRWCallState *ob_state;
+	unsigned char state_cache_id; /* Could be larger but 254 view changes is already a lot! */

 	/* Rendering state */
 	GPUShader *shader;
@@ -300,8 +300,9 @@ typedef struct DRWManager {

 	/* View dependant uniforms. */
 	float original_mat[6][4][4]; /* Original rv3d matrices. */
-	int override_mat;           /* Bitflag of which matrices are overriden. */
+	int override_mat;            /* Bitflag of which matrices are overriden. */
 	int num_clip_planes;         /* Number of active clipplanes. */
+	bool dirty_mat;

 	struct {
 		float mat[6][4][4];
--- a/source/blender/draw/intern/draw_manager_data.c
+++ b/source/blender/draw/intern/draw_manager_data.c
@@ -246,9 +246,11 @@ static void drw_call_calc_orco(ID *ob_data, float (*r_orcofacs)[3])
 	}
 }

-static void drw_call_state_init(DRWCallState *state, DRWShadingGroup *shgroup, float (*obmat)[4], ID *ob_data)
+static DRWCallState *drw_call_state_create(DRWShadingGroup *shgroup, float (*obmat)[4], ID *ob_data)
 {
+	DRWCallState *state = BLI_mempool_alloc(DST.vmempool->states);
 	state->flag = 0;
+	state->cache_id = 0;
 	state->matflag = shgroup->matflag;

 	/* TODO Set culling bsphere IF needed by the DRWPass */
@@ -266,11 +268,22 @@ static void drw_call_state_init(DRWCallState *state, DRWShadingGroup *shgroup, f
 		unit_m4(state->model);
 	}

-	/* Orco factors */
+	/* Orco factors: We compute this at creation to not have to save the *ob_data */
 	if ((state->matflag & DRW_CALL_ORCOTEXFAC) != 0) {
 		drw_call_calc_orco(ob_data, state->orcotexfac);
 		state->matflag &= ~DRW_CALL_ORCOTEXFAC;
 	}
+
+	return state;
+}
+
+static DRWCallState *drw_call_state_object(DRWShadingGroup *shgroup, float (*obmat)[4], ID *ob_data)
+{
+	if (DST.ob_state == NULL) {
+		DST.ob_state = drw_call_state_create(shgroup, obmat, ob_data);
+	}
+
+	return DST.ob_state;
 }

 void DRW_shgroup_call_add(DRWShadingGroup *shgroup, Gwn_Batch *geom, float (*obmat)[4])
@@ -279,13 +292,13 @@ void DRW_shgroup_call_add(DRWShadingGroup *shgroup, Gwn_Batch *geom, float (*obm
 	BLI_assert(shgroup->type == DRW_SHG_NORMAL);

 	DRWCall *call = BLI_mempool_alloc(DST.vmempool->calls);
-	call->state = BLI_mempool_alloc(DST.vmempool->states);
+	call->state = drw_call_state_create(shgroup, obmat, NULL);
 	call->head.type = DRW_CALL_SINGLE;
 #ifdef USE_GPU_SELECT
 	call->head.select_id = DST.select_id;
 #endif
 	call->geometry = geom;
-	drw_call_state_init(call->state, shgroup, obmat, NULL);
+
 	BLI_LINKS_APPEND(&shgroup->calls, (DRWCallHeader *)call);
 }

@@ -296,13 +309,13 @@ void DRW_shgroup_call_object_add(DRWShadingGroup *shgroup, Gwn_Batch *geom, Obje
 	BLI_assert(shgroup->type == DRW_SHG_NORMAL);

 	DRWCall *call = BLI_mempool_alloc(DST.vmempool->calls);
-	call->state = BLI_mempool_alloc(DST.vmempool->states);
+	call->state = drw_call_state_object(shgroup, ob->obmat, ob->data);
 	call->head.type = DRW_CALL_SINGLE;
 #ifdef USE_GPU_SELECT
 	call->head.select_id = DST.select_id;
 #endif
 	call->geometry = geom;
-	drw_call_state_init(call->state, shgroup, ob->obmat, ob->data);
+
 	BLI_LINKS_APPEND(&shgroup->calls, (DRWCallHeader *)call);
 }

@@ -315,14 +328,14 @@ void DRW_shgroup_call_generate_add(
 	BLI_assert(shgroup->type == DRW_SHG_NORMAL);

 	DRWCallGenerate *call = BLI_mempool_alloc(DST.vmempool->calls);
-	call->state = BLI_mempool_alloc(DST.vmempool->states);
+	call->state = drw_call_state_create(shgroup, obmat, NULL);
 	call->head.type = DRW_CALL_GENERATE;
 #ifdef USE_GPU_SELECT
 	call->head.select_id = DST.select_id;
 #endif
 	call->geometry_fn = geometry_fn;
 	call->user_data = user_data;
-	drw_call_state_init(call->state, shgroup, obmat, NULL);
+
 	BLI_LINKS_APPEND(&shgroup->calls, (DRWCallHeader *)call);
 }

--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -25,6 +25,8 @@

 #include "draw_manager.h"

+#include "BLI_mempool.h"
+
 #include "BIF_glutil.h"

 #include "BKE_global.h"
@@ -367,7 +369,13 @@ void DRW_state_clip_planes_reset(void)

 static void draw_matrices_model_prepare(DRWCallState *st)
 {
-	/* OPTI : We can optimize further by sharing this computation for each call using the same object. */
+	if (st->cache_id == DST.state_cache_id) {
+		return; /* Values are already updated for this view. */
+	}
+	else {
+		st->cache_id = DST.state_cache_id;
+	}
+
 	/* Order matters */
 	if (st->matflag & (DRW_CALL_MODELVIEW | DRW_CALL_MODELVIEWINVERSE |
 	                  DRW_CALL_NORMALVIEW | DRW_CALL_EYEVEC))
@@ -700,9 +708,26 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)

 static void drw_draw_pass_ex(DRWPass *pass, DRWShadingGroup *start_group, DRWShadingGroup *end_group)
 {
-	/* Start fresh */
 	DST.shader = NULL;

+	if (DST.dirty_mat) {
+		DST.state_cache_id++;
+		DST.dirty_mat = false;
+		/* Catch integer wrap around. */
+		if (UNLIKELY(DST.state_cache_id == 0)) {
+			DST.state_cache_id = 1;
+			/* We must reset all CallStates to ensure that not
+			 * a single one stayed with cache_id equal to 1. */
+			BLI_mempool_iter iter;
+			DRWCallState *state;
+			BLI_mempool_iternew(DST.vmempool->states, &iter);
+			while ((state = BLI_mempool_iterstep(&iter))) {
+				state->cache_id = 0;
+			}
+		}
+		/* TODO dispatch threads to compute matrices/culling */
+	}
+
 	BLI_assert(DST.buffer_finish_called && "DRW_render_instance_buffer_finish had not been called before drawing");

 	drw_state_set(pass->state);