fix instance support when using SIMD
This commit is contained in:
@@ -39,7 +39,7 @@ struct SVBVHNode
|
||||
int nchilds;
|
||||
|
||||
//Array of bb, array of childs
|
||||
float *bb;
|
||||
float *child_bb;
|
||||
SVBVHNode **child;
|
||||
};
|
||||
|
||||
@@ -57,7 +57,7 @@ inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHN
|
||||
int i=0;
|
||||
while(i+4 <= node->nchilds)
|
||||
{
|
||||
int res = test_bb_group4( (__m128*) (node->bb+6*i), isec );
|
||||
int res = test_bb_group4( (__m128*) (node->child_bb+6*i), isec );
|
||||
RE_RC_COUNT(isec->raycounter->bb.test);
|
||||
RE_RC_COUNT(isec->raycounter->bb.test);
|
||||
RE_RC_COUNT(isec->raycounter->bb.test);
|
||||
@@ -72,7 +72,7 @@ inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHN
|
||||
}
|
||||
while(i < node->nchilds)
|
||||
{
|
||||
if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
|
||||
if(RE_rayobject_bb_intersect_test(isec, (const float*)node->child_bb+6*i))
|
||||
stack[stack_pos++] = node->child[i];
|
||||
i++;
|
||||
}
|
||||
@@ -81,12 +81,51 @@ inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHN
|
||||
{
|
||||
for(int i=0; i<node->nchilds; i++)
|
||||
{
|
||||
if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
|
||||
if(RE_rayobject_bb_intersect_test(isec, (const float*)node->child_bb+6*i))
|
||||
stack[stack_pos++] = node->child[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max)
|
||||
{
|
||||
if(is_leaf(node))
|
||||
{
|
||||
RE_rayobject_merge_bb( (RayObject*)node, min, max);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i=0;
|
||||
while(SVBVH_SIMD && i+4 <= node->nchilds)
|
||||
{
|
||||
float *res = node->child_bb + 6*i;
|
||||
for(int j=0; j<3; j++)
|
||||
{
|
||||
min[j] = MIN2(min[j], res[4*j+0]);
|
||||
min[j] = MIN2(min[j], res[4*j+1]);
|
||||
min[j] = MIN2(min[j], res[4*j+2]);
|
||||
min[j] = MIN2(min[j], res[4*j+3]);
|
||||
}
|
||||
for(int j=0; j<3; j++)
|
||||
{
|
||||
max[j] = MAX2(max[j], res[4*(j+3)+0]);
|
||||
max[j] = MAX2(max[j], res[4*(j+3)+1]);
|
||||
max[j] = MAX2(max[j], res[4*(j+3)+2]);
|
||||
max[j] = MAX2(max[j], res[4*(j+3)+3]);
|
||||
}
|
||||
|
||||
i += 4;
|
||||
}
|
||||
|
||||
for(; i<node->nchilds; i++)
|
||||
{
|
||||
DO_MIN(node->child_bb+6*i , min);
|
||||
DO_MAX(node->child_bb+3+6*i, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct SVBVHTree
|
||||
{
|
||||
RayObject rayobj;
|
||||
@@ -131,7 +170,7 @@ struct Reorganize_SVBVH
|
||||
{
|
||||
SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode));
|
||||
node->nchilds = nchilds;
|
||||
node->bb = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds);
|
||||
node->child_bb = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds);
|
||||
node->child= (SVBVHNode**)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode*)*nchilds);
|
||||
|
||||
return node;
|
||||
@@ -148,8 +187,8 @@ struct Reorganize_SVBVH
|
||||
while(i+4 <= node->nchilds)
|
||||
{
|
||||
float vec_tmp[4*6];
|
||||
float *res = node->bb+6*i;
|
||||
std::copy( node->bb+6*i, node->bb+6*(i+4), vec_tmp);
|
||||
float *res = node->child_bb+6*i;
|
||||
std::copy( res, res+6*4, vec_tmp);
|
||||
|
||||
for(int j=0; j<6; j++)
|
||||
{
|
||||
@@ -167,18 +206,18 @@ struct Reorganize_SVBVH
|
||||
//memmoves could be memory alligned
|
||||
const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(1,0,1,0) );
|
||||
const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(1,0,1,0) );
|
||||
_mm_store_ps( node->bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) );
|
||||
_mm_store_ps( node->bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) );
|
||||
_mm_store_ps( node->child_bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) );
|
||||
_mm_store_ps( node->child_bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) );
|
||||
|
||||
const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(3,2,3,2) );
|
||||
const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(3,2,3,2) );
|
||||
_mm_store_ps( node->bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) );
|
||||
_mm_store_ps( node->bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) );
|
||||
_mm_store_ps( node->child_bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) );
|
||||
_mm_store_ps( node->child_bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) );
|
||||
|
||||
const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+4), _mm_loadu_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) );
|
||||
const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+4), _mm_loadu_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) );
|
||||
_mm_store_ps( node->bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) );
|
||||
_mm_store_ps( node->bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) );
|
||||
_mm_store_ps( node->child_bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) );
|
||||
_mm_store_ps( node->child_bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) );
|
||||
*/
|
||||
|
||||
i += 4;
|
||||
@@ -210,12 +249,12 @@ struct Reorganize_SVBVH
|
||||
float bb[6];
|
||||
INIT_MINMAX(bb, bb+3);
|
||||
RE_rayobject_merge_bb( (RayObject*)o_child, bb, bb+3);
|
||||
copy_bb(node->bb+i*6, bb);
|
||||
copy_bb(node->child_bb+i*6, bb);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
copy_bb(node->bb+i*6, o_child->bb);
|
||||
copy_bb(node->child_bb+i*6, o_child->bb);
|
||||
}
|
||||
}
|
||||
assert( i == 0 );
|
||||
|
||||
Reference in New Issue
Block a user