Files
test/intern/cycles/kernel/svm/svm_attribute.h
Jeroen Bakker d6d306441f T61513: Refactored Cycles Attribute Retrieval
There is a generic function to retrieve float and float3 attributes
`primitive_attribute_float` and primitive_attribute_float3`. Inside
these functions an prioritised if-else construction checked where
the attribute is stored and then retrieved from that location.

Actually the calling function most of the time already knows where
the data is stored. So we could simplify this by splitting these
functions and remove the check logic.

This patch splits the `primitive_attribute_float?` functions into
`primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`.
What leads to less branching and more optimum kernels.

The original function is still being used by OSL and `svm_node_attr`.

This will reduce the compilation time and render time for kernels.
Especially in production scenes there is a lot of benefit.

Impact in compilation times

    job  |   scene_name    | previous |  new  | percentage
  -------+-----------------+----------+-------+------------
  t61513 | empty           |    10.63 | 10.66 |          0%
  t61513 | bmw             |    17.91 | 17.65 |          1%
  t61513 | fishycat        |    19.57 | 17.68 |         10%
  t61513 | barbershop      |    54.10 | 24.41 |         55%
  t61513 | classroom       |    17.55 | 16.29 |          7%
  t61513 | koro            |    18.92 | 18.05 |          5%
  t61513 | pavillion       |    17.43 | 16.52 |          5%
  t61513 | splash279       |    16.48 | 14.91 |         10%
  t61513 | volume_emission |    36.22 | 21.60 |         40%

Impact in render times

    job  |   scene_name    | previous |  new   | percentage
  -------+-----------------+----------+--------+------------
  61513 | empty           |    21.06 |  20.35 |          3%
  61513 | bmw             |   198.44 | 190.05 |          4%
  61513 | fishycat        |   394.20 | 401.25 |         -2%
  61513 | barbershop      |  1188.16 | 912.39 |         23%
  61513 | classroom       |   341.08 | 340.38 |          0%
  61513 | koro            |   472.43 | 471.80 |          0%
  61513 | pavillion       |   905.77 | 899.80 |          1%
  61513 | splash279       |    55.26 |  54.86 |          1%
  61513 | volume_emission |    62.59 |  61.70 |          1%

There is also a possitive impact when using CPU and CUDA, but they are small.

I didn't split the hair logic from the surface logic due to:

* Hair and surface use same attribute types. It was not clear if it could be
  splitted when looking at the code only.
* Hair and surface are quick to compile and to read. So the benefit is quite
  small.

Differential Revision: https://developer.blender.org/D4375
2019-02-19 16:25:48 +01:00

147 lines
3.7 KiB
C

/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
/* Attribute Node */
ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
uint4 node, NodeAttributeType *type,
uint *out_offset)
{
*out_offset = node.z;
*type = (NodeAttributeType)node.w;
AttributeDescriptor desc;
if(sd->object != OBJECT_NONE) {
desc = find_attribute(kg, sd, node.y);
if(desc.offset == ATTR_STD_NOT_FOUND) {
desc = attribute_not_found();
desc.offset = 0;
desc.type = (NodeAttributeType)node.w;
}
}
else {
/* background */
desc = attribute_not_found();
desc.offset = 0;
desc.type = (NodeAttributeType)node.w;
}
return desc;
}
ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
NodeAttributeType type;
uint out_offset;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if (desc.type == NODE_ATTR_FLOAT) {
float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f);
}
else {
stack_store_float3(stack, out_offset, make_float3(f, f, f));
}
}
else {
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(f));
}
else {
stack_store_float3(stack, out_offset, f);
}
}
}
#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_noinline
#endif
void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
NodeAttributeType type;
uint out_offset;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if (desc.type == NODE_ATTR_FLOAT) {
float dx;
float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f+dx);
}
else {
stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx));
}
}
else {
float3 dx;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(f+dx));
}
else {
stack_store_float3(stack, out_offset, f+dx);
}
}
}
#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_noinline
#endif
void svm_node_attr_bump_dy(KernelGlobals *kg,
ShaderData *sd,
float *stack,
uint4 node)
{
NodeAttributeType type;
uint out_offset;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if (desc.type == NODE_ATTR_FLOAT) {
float dy;
float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f+dy);
}
else {
stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy));
}
}
else {
float3 dy;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(f+dy));
}
else {
stack_store_float3(stack, out_offset, f+dy);
}
}
}
CCL_NAMESPACE_END