Subdiv: Split eval shaders

Both eval shaders were implemented in osd_kernel_comp.glsl. This PR
separates them for easier understanding of the shaders.

Pull Request: https://projects.blender.org/blender/blender/pulls/135719
This commit is contained in:
Jeroen Bakker
2025-03-10 16:06:00 +01:00
parent 5a029fdf1f
commit 46cfba075d
4 changed files with 345 additions and 198 deletions

View File

@@ -91,7 +91,8 @@ if(WITH_OPENSUBDIV)
endif()
set(GLSL_SRC
internal/evaluator/shaders/osd_kernel_comp.glsl
internal/evaluator/shaders/osd_eval_stencils_comp.glsl
internal/evaluator/shaders/osd_eval_patches_comp.glsl
)
set(GLSL_C)

View File

@@ -135,113 +135,6 @@ GPUComputeEvaluator::~GPUComputeEvaluator()
}
}
static GPUShader *compileKernel(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
bool use_eval_stencil_kernel,
int workGroupSize)
{
using namespace blender::gpu::shader;
ShaderCreateInfo info("opensubdiv_compute_eval");
info.local_group_size(workGroupSize, 1, 1);
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
info.define("OSD_PATCH_BASIS_METAL");
}
else {
info.define("OSD_PATCH_BASIS_GLSL");
}
if (use_eval_stencil_kernel) {
info.define("OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS");
}
else {
info.define("OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES");
}
// TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
// work group size as that requires extensions. This allows us to compile less shaders and
// improve overall performance. Adding length as specialization constant will not work as it is
// used to define an array length. This is not supported by Metal.
std::string length = std::to_string(srcDesc.length);
std::string src_stride = std::to_string(srcDesc.stride);
std::string dst_stride = std::to_string(dstDesc.stride);
std::string work_group_size = std::to_string(workGroupSize);
info.define("LENGTH", length);
info.define("SRC_STRIDE", src_stride);
info.define("DST_STRIDE", dst_stride);
info.define("WORK_GROUP_SIZE", work_group_size);
info.typedef_source("osd_patch_basis.glsl");
info.storage_buf(
SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]");
info.storage_buf(
SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]");
info.push_constant(Type::INT, "srcOffset");
info.push_constant(Type::INT, "dstOffset");
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
if (deriv1) {
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]");
info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]");
info.push_constant(Type::IVEC3, "duDesc");
info.push_constant(Type::IVEC3, "dvDesc");
}
if (deriv2) {
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES");
info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]");
info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]");
info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]");
info.push_constant(Type::IVEC3, "duuDesc");
info.push_constant(Type::IVEC3, "duvDesc");
info.push_constant(Type::IVEC3, "dvvDesc");
}
if (use_eval_stencil_kernel) {
info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::READ, "int", "sizes_buf[]");
info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::READ, "int", "offsets_buf[]");
info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::READ, "int", "indices_buf[]");
info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::READ, "float", "weights_buf[]");
if (deriv1) {
info.storage_buf(
SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "du_weights_buf[]");
info.storage_buf(
SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dv_weights_buf[]");
}
if (deriv2) {
info.storage_buf(
SHADER_DUU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duu_weights_buf[]");
info.storage_buf(
SHADER_DUV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duv_weights_buf[]");
info.storage_buf(
SHADER_DVV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvv_weights_buf[]");
}
info.push_constant(Type::INT, "batchStart");
info.push_constant(Type::INT, "batchEnd");
}
else {
info.storage_buf(SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT,
Qualifier::READ,
"OsdPatchArray",
"patchArrayBuffer[]");
info.storage_buf(
SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::READ, "OsdPatchCoord", "patchCoords[]");
info.storage_buf(
SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::READ, "int", "patchIndexBuffer[]");
info.storage_buf(SHADER_PATCH_PARAM_BUFFER_BUF_SLOT,
Qualifier::READ,
"OsdPatchParam",
"patchParamBuffer[]");
}
info.compute_source("osd_kernel_comp.glsl");
GPUShader *shader = GPU_shader_create_from_info(
reinterpret_cast<const GPUShaderCreateInfo *>(&info));
return shader;
}
bool GPUComputeEvaluator::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
@@ -581,6 +474,90 @@ GPUComputeEvaluator::_StencilKernel::~_StencilKernel()
shader = nullptr;
}
}
static GPUShader *compile_eval_stencil_shader(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
int workGroupSize)
{
using namespace blender::gpu::shader;
ShaderCreateInfo info("opensubdiv_compute_eval");
info.local_group_size(workGroupSize, 1, 1);
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
info.define("OSD_PATCH_BASIS_METAL");
}
else {
info.define("OSD_PATCH_BASIS_GLSL");
}
// TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
// work group size as that requires extensions. This allows us to compile less shaders and
// improve overall performance. Adding length as specialization constant will not work as it is
// used to define an array length. This is not supported by Metal.
std::string length = std::to_string(srcDesc.length);
std::string src_stride = std::to_string(srcDesc.stride);
std::string dst_stride = std::to_string(dstDesc.stride);
std::string work_group_size = std::to_string(workGroupSize);
info.define("LENGTH", length);
info.define("SRC_STRIDE", src_stride);
info.define("DST_STRIDE", dst_stride);
info.define("WORK_GROUP_SIZE", work_group_size);
info.typedef_source("osd_patch_basis.glsl");
info.storage_buf(
SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]");
info.storage_buf(
SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]");
info.push_constant(Type::INT, "srcOffset");
info.push_constant(Type::INT, "dstOffset");
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
if (deriv1) {
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]");
info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]");
info.push_constant(Type::IVEC3, "duDesc");
info.push_constant(Type::IVEC3, "dvDesc");
}
if (deriv2) {
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES");
info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]");
info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]");
info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]");
info.push_constant(Type::IVEC3, "duuDesc");
info.push_constant(Type::IVEC3, "duvDesc");
info.push_constant(Type::IVEC3, "dvvDesc");
}
info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::READ, "int", "sizes_buf[]");
info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::READ, "int", "offsets_buf[]");
info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::READ, "int", "indices_buf[]");
info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::READ, "float", "weights_buf[]");
if (deriv1) {
info.storage_buf(
SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "du_weights_buf[]");
info.storage_buf(
SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dv_weights_buf[]");
}
if (deriv2) {
info.storage_buf(
SHADER_DUU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duu_weights_buf[]");
info.storage_buf(
SHADER_DUV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duv_weights_buf[]");
info.storage_buf(
SHADER_DVV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvv_weights_buf[]");
}
info.push_constant(Type::INT, "batchStart");
info.push_constant(Type::INT, "batchEnd");
info.compute_source("osd_eval_stencils_comp.glsl");
GPUShader *shader = GPU_shader_create_from_info(
reinterpret_cast<const GPUShaderCreateInfo *>(&info));
return shader;
}
bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
@@ -596,8 +573,8 @@ bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDes
shader = nullptr;
}
shader = compileKernel(
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, true, workGroupSize);
shader = compile_eval_stencil_shader(
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, workGroupSize);
if (shader == nullptr) {
return false;
}
@@ -627,6 +604,79 @@ GPUComputeEvaluator::_PatchKernel::~_PatchKernel()
}
}
static GPUShader *compile_eval_patches_shader(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
int workGroupSize)
{
using namespace blender::gpu::shader;
ShaderCreateInfo info("opensubdiv_compute_eval");
info.local_group_size(workGroupSize, 1, 1);
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
info.define("OSD_PATCH_BASIS_METAL");
}
else {
info.define("OSD_PATCH_BASIS_GLSL");
}
// TODO: use specialization constants for src_stride, dst_stride. Not sure we can use
// work group size as that requires extensions. This allows us to compile less shaders and
// improve overall performance. Adding length as specialization constant will not work as it is
// used to define an array length. This is not supported by Metal.
std::string length = std::to_string(srcDesc.length);
std::string src_stride = std::to_string(srcDesc.stride);
std::string dst_stride = std::to_string(dstDesc.stride);
std::string work_group_size = std::to_string(workGroupSize);
info.define("LENGTH", length);
info.define("SRC_STRIDE", src_stride);
info.define("DST_STRIDE", dst_stride);
info.define("WORK_GROUP_SIZE", work_group_size);
info.typedef_source("osd_patch_basis.glsl");
info.storage_buf(
SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]");
info.storage_buf(
SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]");
info.push_constant(Type::INT, "srcOffset");
info.push_constant(Type::INT, "dstOffset");
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
if (deriv1) {
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES");
info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]");
info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]");
info.push_constant(Type::IVEC3, "duDesc");
info.push_constant(Type::IVEC3, "dvDesc");
}
if (deriv2) {
info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES");
info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]");
info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]");
info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]");
info.push_constant(Type::IVEC3, "duuDesc");
info.push_constant(Type::IVEC3, "duvDesc");
info.push_constant(Type::IVEC3, "dvvDesc");
}
info.storage_buf(
SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT, Qualifier::READ, "OsdPatchArray", "patchArrayBuffer[]");
info.storage_buf(
SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::READ, "OsdPatchCoord", "patchCoords[]");
info.storage_buf(
SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::READ, "int", "patchIndexBuffer[]");
info.storage_buf(
SHADER_PATCH_PARAM_BUFFER_BUF_SLOT, Qualifier::READ, "OsdPatchParam", "patchParamBuffer[]");
info.compute_source("osd_eval_patches_comp.glsl");
GPUShader *shader = GPU_shader_create_from_info(
reinterpret_cast<const GPUShaderCreateInfo *>(&info));
return shader;
}
bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
@@ -641,8 +691,8 @@ bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
shader = nullptr;
}
shader = compileKernel(
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, false, workGroupSize);
shader = compile_eval_patches_shader(
srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, workGroupSize);
if (shader == nullptr) {
return false;
}

View File

@@ -24,15 +24,6 @@
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
uint getGlobalInvocationIndex()
{
uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
}
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
OsdPatchCoord GetPatchCoord(int coordIndex)
{
return patchCoords[coordIndex];
@@ -47,7 +38,6 @@ OsdPatchParam GetPatchParam(int patchIndex)
{
return patchParamBuffer[patchIndex];
}
#endif
//------------------------------------------------------------------------------
@@ -132,76 +122,6 @@ void writeDvv(int index, Vertex dvv)
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
void main()
{
int current = int(getGlobalInvocationIndex()) + batchStart;
if (current >= batchEnd) {
return;
}
Vertex dst;
clear(dst);
int offset = offsets_buf[current], size = sizes_buf[current];
for (int stencil = 0; stencil < size; ++stencil) {
int vindex = offset + stencil;
addWithWeight(dst, readVertex(indices_buf[vindex]), weights_buf[vindex]);
}
writeVertex(current, dst);
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
Vertex du, dv;
clear(du);
clear(dv);
for (int i = 0; i < size; ++i) {
// expects the compiler optimizes readVertex out here.
Vertex src = readVertex(indices_buf[offset + i]);
addWithWeight(du, src, du_weights_buf[offset + i]);
addWithWeight(dv, src, dv_weights_buf[offset + i]);
}
if (duDesc.y > 0) { // length
writeDu(current, du);
}
if (dvDesc.y > 0) {
writeDv(current, dv);
}
# endif
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
Vertex duu, duv, dvv;
clear(duu);
clear(duv);
clear(dvv);
for (int i = 0; i < size; ++i) {
// expects the compiler optimizes readVertex out here.
Vertex src = readVertex(indices_buf[offset + i]);
addWithWeight(duu, src, duu_weights_buf[offset + i]);
addWithWeight(duv, src, duv_weights_buf[offset + i]);
addWithWeight(dvv, src, dvv_weights_buf[offset + i]);
}
if (duuDesc.y > 0) { // length
writeDuu(current, duu);
}
if (duvDesc.y > 0) {
writeDuv(current, duv);
}
if (dvvDesc.y > 0) {
writeDvv(current, dvv);
}
# endif
}
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
// PERFORMANCE: stride could be constant, but not as significant as length
void main()
@@ -240,15 +160,15 @@ void main()
}
writeVertex(current, dst);
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
if (duDesc.y > 0) { // length
writeDu(current, du);
}
if (dvDesc.y > 0) {
writeDv(current, dv);
}
# endif
# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
if (duuDesc.y > 0) { // length
writeDuu(current, duu);
}
@@ -258,7 +178,5 @@ void main()
if (dvvDesc.y > 0) {
writeDvv(current, dvv);
}
# endif
}
#endif
}

View File

@@ -0,0 +1,178 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
//------------------------------------------------------------------------------
uint getGlobalInvocationIndex()
{
uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x;
return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row;
}
//------------------------------------------------------------------------------
struct Vertex {
float vertexData[LENGTH];
};
void clear(out Vertex v)
{
for (int i = 0; i < LENGTH; ++i) {
v.vertexData[i] = 0;
}
}
Vertex readVertex(int index)
{
Vertex v;
int vertexIndex = srcOffset + index * SRC_STRIDE;
for (int i = 0; i < LENGTH; ++i) {
v.vertexData[i] = srcVertexBuffer[vertexIndex + i];
}
return v;
}
void writeVertex(int index, Vertex v)
{
int vertexIndex = dstOffset + index * DST_STRIDE;
for (int i = 0; i < LENGTH; ++i) {
dstVertexBuffer[vertexIndex + i] = v.vertexData[i];
}
}
void addWithWeight(inout Vertex v, const Vertex src, float weight)
{
for (int i = 0; i < LENGTH; ++i) {
v.vertexData[i] += weight * src.vertexData[i];
}
}
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
void writeDu(int index, Vertex du)
{
int duIndex = duDesc.x + index * duDesc.z;
for (int i = 0; i < LENGTH; ++i) {
duBuffer[duIndex + i] = du.vertexData[i];
}
}
void writeDv(int index, Vertex dv)
{
int dvIndex = dvDesc.x + index * dvDesc.z;
for (int i = 0; i < LENGTH; ++i) {
dvBuffer[dvIndex + i] = dv.vertexData[i];
}
}
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
void writeDuu(int index, Vertex duu)
{
int duuIndex = duuDesc.x + index * duuDesc.z;
for (int i = 0; i < LENGTH; ++i) {
duuBuffer[duuIndex + i] = duu.vertexData[i];
}
}
void writeDuv(int index, Vertex duv)
{
int duvIndex = duvDesc.x + index * duvDesc.z;
for (int i = 0; i < LENGTH; ++i) {
duvBuffer[duvIndex + i] = duv.vertexData[i];
}
}
void writeDvv(int index, Vertex dvv)
{
int dvvIndex = dvvDesc.x + index * dvvDesc.z;
for (int i = 0; i < LENGTH; ++i) {
dvvBuffer[dvvIndex + i] = dvv.vertexData[i];
}
}
#endif
//------------------------------------------------------------------------------
void main()
{
int current = int(getGlobalInvocationIndex()) + batchStart;
if (current >= batchEnd) {
return;
}
Vertex dst;
clear(dst);
int offset = offsets_buf[current], size = sizes_buf[current];
for (int stencil = 0; stencil < size; ++stencil) {
int vindex = offset + stencil;
addWithWeight(dst, readVertex(indices_buf[vindex]), weights_buf[vindex]);
}
writeVertex(current, dst);
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
Vertex du, dv;
clear(du);
clear(dv);
for (int i = 0; i < size; ++i) {
// expects the compiler optimizes readVertex out here.
Vertex src = readVertex(indices_buf[offset + i]);
addWithWeight(du, src, du_weights_buf[offset + i]);
addWithWeight(dv, src, dv_weights_buf[offset + i]);
}
if (duDesc.y > 0) { // length
writeDu(current, du);
}
if (dvDesc.y > 0) {
writeDv(current, dv);
}
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
Vertex duu, duv, dvv;
clear(duu);
clear(duv);
clear(dvv);
for (int i = 0; i < size; ++i) {
// expects the compiler optimizes readVertex out here.
Vertex src = readVertex(indices_buf[offset + i]);
addWithWeight(duu, src, duu_weights_buf[offset + i]);
addWithWeight(duv, src, duv_weights_buf[offset + i]);
addWithWeight(dvv, src, dvv_weights_buf[offset + i]);
}
if (duuDesc.y > 0) { // length
writeDuu(current, duu);
}
if (duvDesc.y > 0) {
writeDuv(current, duv);
}
if (dvvDesc.y > 0) {
writeDvv(current, dvv);
}
#endif
}