Subdiv: Split eval shaders
Both eval shaders were implemented in osd_kernel_comp.glsl. This PR separates them for easier understanding of the shaders. Pull Request: https://projects.blender.org/blender/blender/pulls/135719
This commit is contained in:
@@ -91,7 +91,8 @@ if(WITH_OPENSUBDIV)
|
||||
endif()
|
||||
|
||||
set(GLSL_SRC
|
||||
internal/evaluator/shaders/osd_kernel_comp.glsl
|
||||
internal/evaluator/shaders/osd_eval_stencils_comp.glsl
|
||||
internal/evaluator/shaders/osd_eval_patches_comp.glsl
|
||||
)
|
||||
|
||||
set(GLSL_C)
|
||||
|
||||
@@ -135,113 +135,6 @@ GPUComputeEvaluator::~GPUComputeEvaluator()
|
||||
}
|
||||
}
|
||||
|
||||
static GPUShader *compileKernel(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
bool use_eval_stencil_kernel,
|
||||
int workGroupSize)
|
||||
{
|
||||
using namespace blender::gpu::shader;
|
||||
ShaderCreateInfo info("opensubdiv_compute_eval");
|
||||
info.local_group_size(workGroupSize, 1, 1);
|
||||
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
|
||||
info.define("OSD_PATCH_BASIS_METAL");
|
||||
}
|
||||
else {
|
||||
info.define("OSD_PATCH_BASIS_GLSL");
|
||||
}
|
||||
if (use_eval_stencil_kernel) {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS");
|
||||
}
|
||||
else {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES");
|
||||
}
|
||||
|
||||
// TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
|
||||
// work group size as that requires extensions. This allows us to compile less shaders and
|
||||
// improve overall performance. Adding length as specialization constant will not work as it is
|
||||
// used to define an array length. This is not supported by Metal.
|
||||
std::string length = std::to_string(srcDesc.length);
|
||||
std::string src_stride = std::to_string(srcDesc.stride);
|
||||
std::string dst_stride = std::to_string(dstDesc.stride);
|
||||
std::string work_group_size = std::to_string(workGroupSize);
|
||||
info.define("LENGTH", length);
|
||||
info.define("SRC_STRIDE", src_stride);
|
||||
info.define("DST_STRIDE", dst_stride);
|
||||
info.define("WORK_GROUP_SIZE", work_group_size);
|
||||
info.typedef_source("osd_patch_basis.glsl");
|
||||
info.storage_buf(
|
||||
SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]");
|
||||
info.storage_buf(
|
||||
SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]");
|
||||
info.push_constant(Type::INT, "srcOffset");
|
||||
info.push_constant(Type::INT, "dstOffset");
|
||||
|
||||
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
|
||||
if (deriv1) {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
|
||||
info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]");
|
||||
info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]");
|
||||
info.push_constant(Type::IVEC3, "duDesc");
|
||||
info.push_constant(Type::IVEC3, "dvDesc");
|
||||
}
|
||||
if (deriv2) {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES");
|
||||
info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]");
|
||||
info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]");
|
||||
info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]");
|
||||
info.push_constant(Type::IVEC3, "duuDesc");
|
||||
info.push_constant(Type::IVEC3, "duvDesc");
|
||||
info.push_constant(Type::IVEC3, "dvvDesc");
|
||||
}
|
||||
|
||||
if (use_eval_stencil_kernel) {
|
||||
info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::READ, "int", "sizes_buf[]");
|
||||
info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::READ, "int", "offsets_buf[]");
|
||||
info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::READ, "int", "indices_buf[]");
|
||||
info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::READ, "float", "weights_buf[]");
|
||||
if (deriv1) {
|
||||
info.storage_buf(
|
||||
SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "du_weights_buf[]");
|
||||
info.storage_buf(
|
||||
SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dv_weights_buf[]");
|
||||
}
|
||||
if (deriv2) {
|
||||
info.storage_buf(
|
||||
SHADER_DUU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duu_weights_buf[]");
|
||||
info.storage_buf(
|
||||
SHADER_DUV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duv_weights_buf[]");
|
||||
info.storage_buf(
|
||||
SHADER_DVV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvv_weights_buf[]");
|
||||
}
|
||||
info.push_constant(Type::INT, "batchStart");
|
||||
info.push_constant(Type::INT, "batchEnd");
|
||||
}
|
||||
else {
|
||||
info.storage_buf(SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT,
|
||||
Qualifier::READ,
|
||||
"OsdPatchArray",
|
||||
"patchArrayBuffer[]");
|
||||
info.storage_buf(
|
||||
SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::READ, "OsdPatchCoord", "patchCoords[]");
|
||||
info.storage_buf(
|
||||
SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::READ, "int", "patchIndexBuffer[]");
|
||||
info.storage_buf(SHADER_PATCH_PARAM_BUFFER_BUF_SLOT,
|
||||
Qualifier::READ,
|
||||
"OsdPatchParam",
|
||||
"patchParamBuffer[]");
|
||||
}
|
||||
info.compute_source("osd_kernel_comp.glsl");
|
||||
GPUShader *shader = GPU_shader_create_from_info(
|
||||
reinterpret_cast<const GPUShaderCreateInfo *>(&info));
|
||||
return shader;
|
||||
}
|
||||
|
||||
bool GPUComputeEvaluator::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
@@ -581,6 +474,90 @@ GPUComputeEvaluator::_StencilKernel::~_StencilKernel()
|
||||
shader = nullptr;
|
||||
}
|
||||
}
|
||||
static GPUShader *compile_eval_stencil_shader(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
int workGroupSize)
|
||||
{
|
||||
using namespace blender::gpu::shader;
|
||||
ShaderCreateInfo info("opensubdiv_compute_eval");
|
||||
info.local_group_size(workGroupSize, 1, 1);
|
||||
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
|
||||
info.define("OSD_PATCH_BASIS_METAL");
|
||||
}
|
||||
else {
|
||||
info.define("OSD_PATCH_BASIS_GLSL");
|
||||
}
|
||||
|
||||
// TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
|
||||
// work group size as that requires extensions. This allows us to compile less shaders and
|
||||
// improve overall performance. Adding length as specialization constant will not work as it is
|
||||
// used to define an array length. This is not supported by Metal.
|
||||
std::string length = std::to_string(srcDesc.length);
|
||||
std::string src_stride = std::to_string(srcDesc.stride);
|
||||
std::string dst_stride = std::to_string(dstDesc.stride);
|
||||
std::string work_group_size = std::to_string(workGroupSize);
|
||||
info.define("LENGTH", length);
|
||||
info.define("SRC_STRIDE", src_stride);
|
||||
info.define("DST_STRIDE", dst_stride);
|
||||
info.define("WORK_GROUP_SIZE", work_group_size);
|
||||
info.typedef_source("osd_patch_basis.glsl");
|
||||
info.storage_buf(
|
||||
SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]");
|
||||
info.storage_buf(
|
||||
SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]");
|
||||
info.push_constant(Type::INT, "srcOffset");
|
||||
info.push_constant(Type::INT, "dstOffset");
|
||||
|
||||
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
|
||||
if (deriv1) {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
|
||||
info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]");
|
||||
info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]");
|
||||
info.push_constant(Type::IVEC3, "duDesc");
|
||||
info.push_constant(Type::IVEC3, "dvDesc");
|
||||
}
|
||||
if (deriv2) {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES");
|
||||
info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]");
|
||||
info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]");
|
||||
info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]");
|
||||
info.push_constant(Type::IVEC3, "duuDesc");
|
||||
info.push_constant(Type::IVEC3, "duvDesc");
|
||||
info.push_constant(Type::IVEC3, "dvvDesc");
|
||||
}
|
||||
|
||||
info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::READ, "int", "sizes_buf[]");
|
||||
info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::READ, "int", "offsets_buf[]");
|
||||
info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::READ, "int", "indices_buf[]");
|
||||
info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::READ, "float", "weights_buf[]");
|
||||
if (deriv1) {
|
||||
info.storage_buf(
|
||||
SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "du_weights_buf[]");
|
||||
info.storage_buf(
|
||||
SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dv_weights_buf[]");
|
||||
}
|
||||
if (deriv2) {
|
||||
info.storage_buf(
|
||||
SHADER_DUU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duu_weights_buf[]");
|
||||
info.storage_buf(
|
||||
SHADER_DUV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duv_weights_buf[]");
|
||||
info.storage_buf(
|
||||
SHADER_DVV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvv_weights_buf[]");
|
||||
}
|
||||
info.push_constant(Type::INT, "batchStart");
|
||||
info.push_constant(Type::INT, "batchEnd");
|
||||
|
||||
info.compute_source("osd_eval_stencils_comp.glsl");
|
||||
GPUShader *shader = GPU_shader_create_from_info(
|
||||
reinterpret_cast<const GPUShaderCreateInfo *>(&info));
|
||||
return shader;
|
||||
}
|
||||
|
||||
bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
@@ -596,8 +573,8 @@ bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDes
|
||||
shader = nullptr;
|
||||
}
|
||||
|
||||
shader = compileKernel(
|
||||
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, true, workGroupSize);
|
||||
shader = compile_eval_stencil_shader(
|
||||
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, workGroupSize);
|
||||
if (shader == nullptr) {
|
||||
return false;
|
||||
}
|
||||
@@ -627,6 +604,79 @@ GPUComputeEvaluator::_PatchKernel::~_PatchKernel()
|
||||
}
|
||||
}
|
||||
|
||||
static GPUShader *compile_eval_patches_shader(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
int workGroupSize)
|
||||
{
|
||||
using namespace blender::gpu::shader;
|
||||
ShaderCreateInfo info("opensubdiv_compute_eval");
|
||||
info.local_group_size(workGroupSize, 1, 1);
|
||||
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
|
||||
info.define("OSD_PATCH_BASIS_METAL");
|
||||
}
|
||||
else {
|
||||
info.define("OSD_PATCH_BASIS_GLSL");
|
||||
}
|
||||
|
||||
// TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
|
||||
// work group size as that requires extensions. This allows us to compile less shaders and
|
||||
// improve overall performance. Adding length as specialization constant will not work as it is
|
||||
// used to define an array length. This is not supported by Metal.
|
||||
std::string length = std::to_string(srcDesc.length);
|
||||
std::string src_stride = std::to_string(srcDesc.stride);
|
||||
std::string dst_stride = std::to_string(dstDesc.stride);
|
||||
std::string work_group_size = std::to_string(workGroupSize);
|
||||
info.define("LENGTH", length);
|
||||
info.define("SRC_STRIDE", src_stride);
|
||||
info.define("DST_STRIDE", dst_stride);
|
||||
info.define("WORK_GROUP_SIZE", work_group_size);
|
||||
info.typedef_source("osd_patch_basis.glsl");
|
||||
info.storage_buf(
|
||||
SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]");
|
||||
info.storage_buf(
|
||||
SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]");
|
||||
info.push_constant(Type::INT, "srcOffset");
|
||||
info.push_constant(Type::INT, "dstOffset");
|
||||
|
||||
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
|
||||
if (deriv1) {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
|
||||
info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]");
|
||||
info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]");
|
||||
info.push_constant(Type::IVEC3, "duDesc");
|
||||
info.push_constant(Type::IVEC3, "dvDesc");
|
||||
}
|
||||
if (deriv2) {
|
||||
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES");
|
||||
info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]");
|
||||
info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]");
|
||||
info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]");
|
||||
info.push_constant(Type::IVEC3, "duuDesc");
|
||||
info.push_constant(Type::IVEC3, "duvDesc");
|
||||
info.push_constant(Type::IVEC3, "dvvDesc");
|
||||
}
|
||||
|
||||
info.storage_buf(
|
||||
SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT, Qualifier::READ, "OsdPatchArray", "patchArrayBuffer[]");
|
||||
info.storage_buf(
|
||||
SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::READ, "OsdPatchCoord", "patchCoords[]");
|
||||
info.storage_buf(
|
||||
SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::READ, "int", "patchIndexBuffer[]");
|
||||
info.storage_buf(
|
||||
SHADER_PATCH_PARAM_BUFFER_BUF_SLOT, Qualifier::READ, "OsdPatchParam", "patchParamBuffer[]");
|
||||
|
||||
info.compute_source("osd_eval_patches_comp.glsl");
|
||||
GPUShader *shader = GPU_shader_create_from_info(
|
||||
reinterpret_cast<const GPUShaderCreateInfo *>(&info));
|
||||
return shader;
|
||||
}
|
||||
|
||||
bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
@@ -641,8 +691,8 @@ bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
shader = nullptr;
|
||||
}
|
||||
|
||||
shader = compileKernel(
|
||||
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, false, workGroupSize);
|
||||
shader = compile_eval_patches_shader(
|
||||
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, workGroupSize);
|
||||
if (shader == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -24,15 +24,6 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
|
||||
uint getGlobalInvocationIndex()
|
||||
{
|
||||
uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
|
||||
return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
|
||||
OsdPatchCoord GetPatchCoord(int coordIndex)
|
||||
{
|
||||
return patchCoords[coordIndex];
|
||||
@@ -47,7 +38,6 @@ OsdPatchParam GetPatchParam(int patchIndex)
|
||||
{
|
||||
return patchParamBuffer[patchIndex];
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -132,76 +122,6 @@ void writeDvv(int index, Vertex dvv)
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
|
||||
|
||||
void main()
|
||||
{
|
||||
int current = int(getGlobalInvocationIndex()) + batchStart;
|
||||
|
||||
if (current >= batchEnd) {
|
||||
return;
|
||||
}
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int offset = offsets_buf[current], size = sizes_buf[current];
|
||||
|
||||
for (int stencil = 0; stencil < size; ++stencil) {
|
||||
int vindex = offset + stencil;
|
||||
addWithWeight(dst, readVertex(indices_buf[vindex]), weights_buf[vindex]);
|
||||
}
|
||||
|
||||
writeVertex(current, dst);
|
||||
|
||||
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
Vertex du, dv;
|
||||
clear(du);
|
||||
clear(dv);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
// expects the compiler optimizes readVertex out here.
|
||||
Vertex src = readVertex(indices_buf[offset + i]);
|
||||
addWithWeight(du, src, du_weights_buf[offset + i]);
|
||||
addWithWeight(dv, src, dv_weights_buf[offset + i]);
|
||||
}
|
||||
|
||||
if (duDesc.y > 0) { // length
|
||||
writeDu(current, du);
|
||||
}
|
||||
if (dvDesc.y > 0) {
|
||||
writeDv(current, dv);
|
||||
}
|
||||
# endif
|
||||
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
Vertex duu, duv, dvv;
|
||||
clear(duu);
|
||||
clear(duv);
|
||||
clear(dvv);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
// expects the compiler optimizes readVertex out here.
|
||||
Vertex src = readVertex(indices_buf[offset + i]);
|
||||
addWithWeight(duu, src, duu_weights_buf[offset + i]);
|
||||
addWithWeight(duv, src, duv_weights_buf[offset + i]);
|
||||
addWithWeight(dvv, src, dvv_weights_buf[offset + i]);
|
||||
}
|
||||
|
||||
if (duuDesc.y > 0) { // length
|
||||
writeDuu(current, duu);
|
||||
}
|
||||
if (duvDesc.y > 0) {
|
||||
writeDuv(current, duv);
|
||||
}
|
||||
if (dvvDesc.y > 0) {
|
||||
writeDvv(current, dvv);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
|
||||
|
||||
// PERFORMANCE: stride could be constant, but not as significant as length
|
||||
|
||||
void main()
|
||||
@@ -240,15 +160,15 @@ void main()
|
||||
}
|
||||
writeVertex(current, dst);
|
||||
|
||||
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
if (duDesc.y > 0) { // length
|
||||
writeDu(current, du);
|
||||
}
|
||||
if (dvDesc.y > 0) {
|
||||
writeDv(current, dv);
|
||||
}
|
||||
# endif
|
||||
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
#endif
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
if (duuDesc.y > 0) { // length
|
||||
writeDuu(current, duu);
|
||||
}
|
||||
@@ -258,7 +178,5 @@ void main()
|
||||
if (dvvDesc.y > 0) {
|
||||
writeDvv(current, dvv);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
//
|
||||
// Copyright 2013 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
uint getGlobalInvocationIndex()
|
||||
{
|
||||
uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
|
||||
return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
struct Vertex {
|
||||
float vertexData[LENGTH];
|
||||
};
|
||||
|
||||
void clear(out Vertex v)
|
||||
{
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
v.vertexData[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Vertex readVertex(int index)
|
||||
{
|
||||
Vertex v;
|
||||
int vertexIndex = srcOffset + index * SRC_STRIDE;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
v.vertexData[i] = srcVertexBuffer[vertexIndex + i];
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
void writeVertex(int index, Vertex v)
|
||||
{
|
||||
int vertexIndex = dstOffset + index * DST_STRIDE;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dstVertexBuffer[vertexIndex + i] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void addWithWeight(inout Vertex v, const Vertex src, float weight)
|
||||
{
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
v.vertexData[i] += weight * src.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
void writeDu(int index, Vertex du)
|
||||
{
|
||||
int duIndex = duDesc.x + index * duDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
duBuffer[duIndex + i] = du.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDv(int index, Vertex dv)
|
||||
{
|
||||
int dvIndex = dvDesc.x + index * dvDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dvBuffer[dvIndex + i] = dv.vertexData[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
void writeDuu(int index, Vertex duu)
|
||||
{
|
||||
int duuIndex = duuDesc.x + index * duuDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
duuBuffer[duuIndex + i] = duu.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDuv(int index, Vertex duv)
|
||||
{
|
||||
int duvIndex = duvDesc.x + index * duvDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
duvBuffer[duvIndex + i] = duv.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDvv(int index, Vertex dvv)
|
||||
{
|
||||
int dvvIndex = dvvDesc.x + index * dvvDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dvvBuffer[dvvIndex + i] = dvv.vertexData[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
void main()
|
||||
{
|
||||
int current = int(getGlobalInvocationIndex()) + batchStart;
|
||||
|
||||
if (current >= batchEnd) {
|
||||
return;
|
||||
}
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int offset = offsets_buf[current], size = sizes_buf[current];
|
||||
|
||||
for (int stencil = 0; stencil < size; ++stencil) {
|
||||
int vindex = offset + stencil;
|
||||
addWithWeight(dst, readVertex(indices_buf[vindex]), weights_buf[vindex]);
|
||||
}
|
||||
|
||||
writeVertex(current, dst);
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
Vertex du, dv;
|
||||
clear(du);
|
||||
clear(dv);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
// expects the compiler optimizes readVertex out here.
|
||||
Vertex src = readVertex(indices_buf[offset + i]);
|
||||
addWithWeight(du, src, du_weights_buf[offset + i]);
|
||||
addWithWeight(dv, src, dv_weights_buf[offset + i]);
|
||||
}
|
||||
|
||||
if (duDesc.y > 0) { // length
|
||||
writeDu(current, du);
|
||||
}
|
||||
if (dvDesc.y > 0) {
|
||||
writeDv(current, dv);
|
||||
}
|
||||
#endif
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
Vertex duu, duv, dvv;
|
||||
clear(duu);
|
||||
clear(duv);
|
||||
clear(dvv);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
// expects the compiler optimizes readVertex out here.
|
||||
Vertex src = readVertex(indices_buf[offset + i]);
|
||||
addWithWeight(duu, src, duu_weights_buf[offset + i]);
|
||||
addWithWeight(duv, src, duv_weights_buf[offset + i]);
|
||||
addWithWeight(dvv, src, dvv_weights_buf[offset + i]);
|
||||
}
|
||||
|
||||
if (duuDesc.y > 0) { // length
|
||||
writeDuu(current, duu);
|
||||
}
|
||||
if (duvDesc.y > 0) {
|
||||
writeDuv(current, duv);
|
||||
}
|
||||
if (dvvDesc.y > 0) {
|
||||
writeDvv(current, dvv);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
Reference in New Issue
Block a user