diff --git a/intern/opensubdiv/CMakeLists.txt b/intern/opensubdiv/CMakeLists.txt index 01545bcaeb5..c17550531f2 100644 --- a/intern/opensubdiv/CMakeLists.txt +++ b/intern/opensubdiv/CMakeLists.txt @@ -91,7 +91,8 @@ if(WITH_OPENSUBDIV) endif() set(GLSL_SRC - internal/evaluator/shaders/osd_kernel_comp.glsl + internal/evaluator/shaders/osd_eval_stencils_comp.glsl + internal/evaluator/shaders/osd_eval_patches_comp.glsl ) set(GLSL_C) diff --git a/intern/opensubdiv/internal/evaluator/gpu_compute_evaluator.cc b/intern/opensubdiv/internal/evaluator/gpu_compute_evaluator.cc index f41d18a5b3e..02c29f0808c 100644 --- a/intern/opensubdiv/internal/evaluator/gpu_compute_evaluator.cc +++ b/intern/opensubdiv/internal/evaluator/gpu_compute_evaluator.cc @@ -135,113 +135,6 @@ GPUComputeEvaluator::~GPUComputeEvaluator() } } -static GPUShader *compileKernel(BufferDescriptor const &srcDesc, - BufferDescriptor const &dstDesc, - BufferDescriptor const &duDesc, - BufferDescriptor const &dvDesc, - BufferDescriptor const &duuDesc, - BufferDescriptor const &duvDesc, - BufferDescriptor const &dvvDesc, - bool use_eval_stencil_kernel, - int workGroupSize) -{ - using namespace blender::gpu::shader; - ShaderCreateInfo info("opensubdiv_compute_eval"); - info.local_group_size(workGroupSize, 1, 1); - if (GPU_backend_get_type() == GPU_BACKEND_METAL) { - info.define("OSD_PATCH_BASIS_METAL"); - } - else { - info.define("OSD_PATCH_BASIS_GLSL"); - } - if (use_eval_stencil_kernel) { - info.define("OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS"); - } - else { - info.define("OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES"); - } - - // TODO: use specialization constants for src_stride, dst_stride. Not sure we can use - // work group size as that requires extensions. This allows us to compile less shaders and - // improve overall performance. Adding length as specialization constant will not work as it is - // used to define an array length. This is not supported by Metal. - std::string length = std::to_string(srcDesc.length); - std::string src_stride = std::to_string(srcDesc.stride); - std::string dst_stride = std::to_string(dstDesc.stride); - std::string work_group_size = std::to_string(workGroupSize); - info.define("LENGTH", length); - info.define("SRC_STRIDE", src_stride); - info.define("DST_STRIDE", dst_stride); - info.define("WORK_GROUP_SIZE", work_group_size); - info.typedef_source("osd_patch_basis.glsl"); - info.storage_buf( - SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]"); - info.storage_buf( - SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]"); - info.push_constant(Type::INT, "srcOffset"); - info.push_constant(Type::INT, "dstOffset"); - - bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0); - bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0); - if (deriv1) { - info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES"); - info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]"); - info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]"); - info.push_constant(Type::IVEC3, "duDesc"); - info.push_constant(Type::IVEC3, "dvDesc"); - } - if (deriv2) { - info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES"); - info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]"); - info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]"); - info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]"); - info.push_constant(Type::IVEC3, "duuDesc"); - info.push_constant(Type::IVEC3, "duvDesc"); - info.push_constant(Type::IVEC3, "dvvDesc"); - } - - if (use_eval_stencil_kernel) { - info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::READ, "int", "sizes_buf[]"); - info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::READ, "int", "offsets_buf[]"); - info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::READ, "int", "indices_buf[]"); - info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::READ, "float", "weights_buf[]"); - if (deriv1) { - info.storage_buf( - SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "du_weights_buf[]"); - info.storage_buf( - SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dv_weights_buf[]"); - } - if (deriv2) { - info.storage_buf( - SHADER_DUU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duu_weights_buf[]"); - info.storage_buf( - SHADER_DUV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duv_weights_buf[]"); - info.storage_buf( - SHADER_DVV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvv_weights_buf[]"); - } - info.push_constant(Type::INT, "batchStart"); - info.push_constant(Type::INT, "batchEnd"); - } - else { - info.storage_buf(SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT, - Qualifier::READ, - "OsdPatchArray", - "patchArrayBuffer[]"); - info.storage_buf( - SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::READ, "OsdPatchCoord", "patchCoords[]"); - info.storage_buf( - SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::READ, "int", "patchIndexBuffer[]"); - info.storage_buf(SHADER_PATCH_PARAM_BUFFER_BUF_SLOT, - Qualifier::READ, - "OsdPatchParam", - "patchParamBuffer[]"); - } - info.compute_source("osd_kernel_comp.glsl"); - GPUShader *shader = GPU_shader_create_from_info( - reinterpret_cast(&info)); - return shader; -} - bool GPUComputeEvaluator::Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, @@ -581,6 +474,90 @@ GPUComputeEvaluator::_StencilKernel::~_StencilKernel() shader = nullptr; } } +static GPUShader *compile_eval_stencil_shader(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + int workGroupSize) +{ + using namespace blender::gpu::shader; + ShaderCreateInfo info("opensubdiv_compute_eval"); + info.local_group_size(workGroupSize, 1, 1); + if (GPU_backend_get_type() == GPU_BACKEND_METAL) { + info.define("OSD_PATCH_BASIS_METAL"); + } + else { + info.define("OSD_PATCH_BASIS_GLSL"); + } + + // TODO: use specialization constants for src_stride, dst_stride. Not sure we can use + // work group size as that requires extensions. This allows us to compile less shaders and + // improve overall performance. Adding length as specialization constant will not work as it is + // used to define an array length. This is not supported by Metal. + std::string length = std::to_string(srcDesc.length); + std::string src_stride = std::to_string(srcDesc.stride); + std::string dst_stride = std::to_string(dstDesc.stride); + std::string work_group_size = std::to_string(workGroupSize); + info.define("LENGTH", length); + info.define("SRC_STRIDE", src_stride); + info.define("DST_STRIDE", dst_stride); + info.define("WORK_GROUP_SIZE", work_group_size); + info.typedef_source("osd_patch_basis.glsl"); + info.storage_buf( + SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]"); + info.storage_buf( + SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]"); + info.push_constant(Type::INT, "srcOffset"); + info.push_constant(Type::INT, "dstOffset"); + + bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0); + bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0); + if (deriv1) { + info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES"); + info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]"); + info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]"); + info.push_constant(Type::IVEC3, "duDesc"); + info.push_constant(Type::IVEC3, "dvDesc"); + } + if (deriv2) { + info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES"); + info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]"); + info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]"); + info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]"); + info.push_constant(Type::IVEC3, "duuDesc"); + info.push_constant(Type::IVEC3, "duvDesc"); + info.push_constant(Type::IVEC3, "dvvDesc"); + } + + info.storage_buf(SHADER_SIZES_BUF_SLOT, Qualifier::READ, "int", "sizes_buf[]"); + info.storage_buf(SHADER_OFFSETS_BUF_SLOT, Qualifier::READ, "int", "offsets_buf[]"); + info.storage_buf(SHADER_INDICES_BUF_SLOT, Qualifier::READ, "int", "indices_buf[]"); + info.storage_buf(SHADER_WEIGHTS_BUF_SLOT, Qualifier::READ, "float", "weights_buf[]"); + if (deriv1) { + info.storage_buf( + SHADER_DU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "du_weights_buf[]"); + info.storage_buf( + SHADER_DV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dv_weights_buf[]"); + } + if (deriv2) { + info.storage_buf( + SHADER_DUU_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duu_weights_buf[]"); + info.storage_buf( + SHADER_DUV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "duv_weights_buf[]"); + info.storage_buf( + SHADER_DVV_WEIGHTS_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvv_weights_buf[]"); + } + info.push_constant(Type::INT, "batchStart"); + info.push_constant(Type::INT, "batchEnd"); + + info.compute_source("osd_eval_stencils_comp.glsl"); + GPUShader *shader = GPU_shader_create_from_info( + reinterpret_cast(&info)); + return shader; +} bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, @@ -596,8 +573,8 @@ bool GPUComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDes shader = nullptr; } - shader = compileKernel( - srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, true, workGroupSize); + shader = compile_eval_stencil_shader( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, workGroupSize); if (shader == nullptr) { return false; } @@ -627,6 +604,79 @@ GPUComputeEvaluator::_PatchKernel::~_PatchKernel() } } +static GPUShader *compile_eval_patches_shader(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + int workGroupSize) +{ + using namespace blender::gpu::shader; + ShaderCreateInfo info("opensubdiv_compute_eval"); + info.local_group_size(workGroupSize, 1, 1); + if (GPU_backend_get_type() == GPU_BACKEND_METAL) { + info.define("OSD_PATCH_BASIS_METAL"); + } + else { + info.define("OSD_PATCH_BASIS_GLSL"); + } + + // TODO: use specialization constants for src_stride, dst_stride. Not sure we can use + // work group size as that requires extensions. This allows us to compile less shaders and + // improve overall performance. Adding length as specialization constant will not work as it is + // used to define an array length. This is not supported by Metal. + std::string length = std::to_string(srcDesc.length); + std::string src_stride = std::to_string(srcDesc.stride); + std::string dst_stride = std::to_string(dstDesc.stride); + std::string work_group_size = std::to_string(workGroupSize); + info.define("LENGTH", length); + info.define("SRC_STRIDE", src_stride); + info.define("DST_STRIDE", dst_stride); + info.define("WORK_GROUP_SIZE", work_group_size); + info.typedef_source("osd_patch_basis.glsl"); + info.storage_buf( + SHADER_SRC_VERTEX_BUFFER_BUF_SLOT, Qualifier::READ, "float", "srcVertexBuffer[]"); + info.storage_buf( + SHADER_DST_VERTEX_BUFFER_BUF_SLOT, Qualifier::WRITE, "float", "dstVertexBuffer[]"); + info.push_constant(Type::INT, "srcOffset"); + info.push_constant(Type::INT, "dstOffset"); + + bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0); + bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0); + if (deriv1) { + info.define("OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES"); + info.storage_buf(SHADER_DU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duBuffer[]"); + info.storage_buf(SHADER_DV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvBuffer[]"); + info.push_constant(Type::IVEC3, "duDesc"); + info.push_constant(Type::IVEC3, "dvDesc"); + } + if (deriv2) { + info.define("OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES"); + info.storage_buf(SHADER_DUU_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duuBuffer[]"); + info.storage_buf(SHADER_DUV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "duvBuffer[]"); + info.storage_buf(SHADER_DVV_BUFFER_BUF_SLOT, Qualifier::READ_WRITE, "float", "dvvBuffer[]"); + info.push_constant(Type::IVEC3, "duuDesc"); + info.push_constant(Type::IVEC3, "duvDesc"); + info.push_constant(Type::IVEC3, "dvvDesc"); + } + + info.storage_buf( + SHADER_PATCH_ARRAY_BUFFER_BUF_SLOT, Qualifier::READ, "OsdPatchArray", "patchArrayBuffer[]"); + info.storage_buf( + SHADER_PATCH_COORDS_BUF_SLOT, Qualifier::READ, "OsdPatchCoord", "patchCoords[]"); + info.storage_buf( + SHADER_PATCH_INDEX_BUFFER_BUF_SLOT, Qualifier::READ, "int", "patchIndexBuffer[]"); + info.storage_buf( + SHADER_PATCH_PARAM_BUFFER_BUF_SLOT, Qualifier::READ, "OsdPatchParam", "patchParamBuffer[]"); + + info.compute_source("osd_eval_patches_comp.glsl"); + GPUShader *shader = GPU_shader_create_from_info( + reinterpret_cast(&info)); + return shader; +} + bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, @@ -641,8 +691,8 @@ bool GPUComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc, shader = nullptr; } - shader = compileKernel( - srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, false, workGroupSize); + shader = compile_eval_patches_shader( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, workGroupSize); if (shader == nullptr) { return false; } diff --git a/intern/opensubdiv/internal/evaluator/shaders/osd_kernel_comp.glsl b/intern/opensubdiv/internal/evaluator/shaders/osd_eval_patches_comp.glsl similarity index 68% rename from intern/opensubdiv/internal/evaluator/shaders/osd_kernel_comp.glsl rename to intern/opensubdiv/internal/evaluator/shaders/osd_eval_patches_comp.glsl index 7726ebd593b..da3b2bb025e 100644 --- a/intern/opensubdiv/internal/evaluator/shaders/osd_kernel_comp.glsl +++ b/intern/opensubdiv/internal/evaluator/shaders/osd_eval_patches_comp.glsl @@ -24,15 +24,6 @@ //------------------------------------------------------------------------------ -#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS) -uint getGlobalInvocationIndex() -{ - uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x; - return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row; -} -#endif - -#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES) OsdPatchCoord GetPatchCoord(int coordIndex) { return patchCoords[coordIndex]; @@ -47,7 +38,6 @@ OsdPatchParam GetPatchParam(int patchIndex) { return patchParamBuffer[patchIndex]; } -#endif //------------------------------------------------------------------------------ @@ -132,76 +122,6 @@ void writeDvv(int index, Vertex dvv) #endif //------------------------------------------------------------------------------ -#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS) - -void main() -{ - int current = int(getGlobalInvocationIndex()) + batchStart; - - if (current >= batchEnd) { - return; - } - - Vertex dst; - clear(dst); - - int offset = offsets_buf[current], size = sizes_buf[current]; - - for (int stencil = 0; stencil < size; ++stencil) { - int vindex = offset + stencil; - addWithWeight(dst, readVertex(indices_buf[vindex]), weights_buf[vindex]); - } - - writeVertex(current, dst); - -# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) - Vertex du, dv; - clear(du); - clear(dv); - for (int i = 0; i < size; ++i) { - // expects the compiler optimizes readVertex out here. - Vertex src = readVertex(indices_buf[offset + i]); - addWithWeight(du, src, du_weights_buf[offset + i]); - addWithWeight(dv, src, dv_weights_buf[offset + i]); - } - - if (duDesc.y > 0) { // length - writeDu(current, du); - } - if (dvDesc.y > 0) { - writeDv(current, dv); - } -# endif -# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) - Vertex duu, duv, dvv; - clear(duu); - clear(duv); - clear(dvv); - for (int i = 0; i < size; ++i) { - // expects the compiler optimizes readVertex out here. - Vertex src = readVertex(indices_buf[offset + i]); - addWithWeight(duu, src, duu_weights_buf[offset + i]); - addWithWeight(duv, src, duv_weights_buf[offset + i]); - addWithWeight(dvv, src, dvv_weights_buf[offset + i]); - } - - if (duuDesc.y > 0) { // length - writeDuu(current, duu); - } - if (duvDesc.y > 0) { - writeDuv(current, duv); - } - if (dvvDesc.y > 0) { - writeDvv(current, dvv); - } -# endif -} - -#endif - -//------------------------------------------------------------------------------ -#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES) - // PERFORMANCE: stride could be constant, but not as significant as length void main() @@ -240,15 +160,15 @@ void main() } writeVertex(current, dst); -# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) if (duDesc.y > 0) { // length writeDu(current, du); } if (dvDesc.y > 0) { writeDv(current, dv); } -# endif -# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +#endif +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) if (duuDesc.y > 0) { // length writeDuu(current, duu); } @@ -258,7 +178,5 @@ void main() if (dvvDesc.y > 0) { writeDvv(current, dvv); } -# endif -} - #endif +} diff --git a/intern/opensubdiv/internal/evaluator/shaders/osd_eval_stencils_comp.glsl b/intern/opensubdiv/internal/evaluator/shaders/osd_eval_stencils_comp.glsl new file mode 100644 index 00000000000..a396662c983 --- /dev/null +++ b/intern/opensubdiv/internal/evaluator/shaders/osd_eval_stencils_comp.glsl @@ -0,0 +1,178 @@ +// +// Copyright 2013 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +//------------------------------------------------------------------------------ + +uint getGlobalInvocationIndex() +{ + uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x; + return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row; +} + +//------------------------------------------------------------------------------ + +struct Vertex { + float vertexData[LENGTH]; +}; + +void clear(out Vertex v) +{ + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] = 0; + } +} + +Vertex readVertex(int index) +{ + Vertex v; + int vertexIndex = srcOffset + index * SRC_STRIDE; + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] = srcVertexBuffer[vertexIndex + i]; + } + return v; +} + +void writeVertex(int index, Vertex v) +{ + int vertexIndex = dstOffset + index * DST_STRIDE; + for (int i = 0; i < LENGTH; ++i) { + dstVertexBuffer[vertexIndex + i] = v.vertexData[i]; + } +} + +void addWithWeight(inout Vertex v, const Vertex src, float weight) +{ + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] += weight * src.vertexData[i]; + } +} + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +void writeDu(int index, Vertex du) +{ + int duIndex = duDesc.x + index * duDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duBuffer[duIndex + i] = du.vertexData[i]; + } +} + +void writeDv(int index, Vertex dv) +{ + int dvIndex = dvDesc.x + index * dvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dvBuffer[dvIndex + i] = dv.vertexData[i]; + } +} +#endif + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +void writeDuu(int index, Vertex duu) +{ + int duuIndex = duuDesc.x + index * duuDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duuBuffer[duuIndex + i] = duu.vertexData[i]; + } +} + +void writeDuv(int index, Vertex duv) +{ + int duvIndex = duvDesc.x + index * duvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duvBuffer[duvIndex + i] = duv.vertexData[i]; + } +} + +void writeDvv(int index, Vertex dvv) +{ + int dvvIndex = dvvDesc.x + index * dvvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dvvBuffer[dvvIndex + i] = dvv.vertexData[i]; + } +} +#endif + +//------------------------------------------------------------------------------ + +void main() +{ + int current = int(getGlobalInvocationIndex()) + batchStart; + + if (current >= batchEnd) { + return; + } + + Vertex dst; + clear(dst); + + int offset = offsets_buf[current], size = sizes_buf[current]; + + for (int stencil = 0; stencil < size; ++stencil) { + int vindex = offset + stencil; + addWithWeight(dst, readVertex(indices_buf[vindex]), weights_buf[vindex]); + } + + writeVertex(current, dst); + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) + Vertex du, dv; + clear(du); + clear(dv); + for (int i = 0; i < size; ++i) { + // expects the compiler optimizes readVertex out here. + Vertex src = readVertex(indices_buf[offset + i]); + addWithWeight(du, src, du_weights_buf[offset + i]); + addWithWeight(dv, src, dv_weights_buf[offset + i]); + } + + if (duDesc.y > 0) { // length + writeDu(current, du); + } + if (dvDesc.y > 0) { + writeDv(current, dv); + } +#endif +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) + Vertex duu, duv, dvv; + clear(duu); + clear(duv); + clear(dvv); + for (int i = 0; i < size; ++i) { + // expects the compiler optimizes readVertex out here. + Vertex src = readVertex(indices_buf[offset + i]); + addWithWeight(duu, src, duu_weights_buf[offset + i]); + addWithWeight(duv, src, duv_weights_buf[offset + i]); + addWithWeight(dvv, src, dvv_weights_buf[offset + i]); + } + + if (duuDesc.y > 0) { // length + writeDuu(current, duu); + } + if (duvDesc.y > 0) { + writeDuv(current, duv); + } + if (dvvDesc.y > 0) { + writeDvv(current, dvv); + } +#endif +}