/* SPDX-FileCopyrightText: 2015 Pixar * * SPDX-License-Identifier: Apache-2.0 */ #ifndef OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_ #define OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_ #include #include #include #include "GPU_storage_buffer.hh" namespace OpenSubdiv::OPENSUBDIV_VERSION::Far { class LimitStencilTable; class StencilTable; } // namespace OpenSubdiv::OPENSUBDIV_VERSION::Far // namespace OPENSUBDIV_VERSION namespace blender::opensubdiv { /// \brief GL stencil table (Shader Storage buffer) /// /// This class is a GLSL SSBO representation of OpenSubdiv::Far::StencilTable. /// /// GLSLComputeKernel consumes this table to apply stencils /// class GPUStencilTableSSBO { public: static GPUStencilTableSSBO *Create(OpenSubdiv::Far::StencilTable const *stencilTable, void *deviceContext = nullptr) { (void)deviceContext; // unused return new GPUStencilTableSSBO(stencilTable); } static GPUStencilTableSSBO *Create(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable, void *deviceContext = nullptr) { (void)deviceContext; // unused return new GPUStencilTableSSBO(limitStencilTable); } explicit GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable); explicit GPUStencilTableSSBO(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable); ~GPUStencilTableSSBO(); // interfaces needed for GLSLComputeKernel GPUStorageBuf *GetSizesBuffer() const { return sizes_buf; } GPUStorageBuf *GetOffsetsBuffer() const { return offsets_buf; } GPUStorageBuf *GetIndicesBuffer() const { return indices_buf; } GPUStorageBuf *GetWeightsBuffer() const { return weights_buf; } GPUStorageBuf *GetDuWeightsBuffer() const { return du_weights_buf; } GPUStorageBuf *GetDvWeightsBuffer() const { return dv_weights_buf; } GPUStorageBuf *GetDuuWeightsBuffer() const { return duu_weights_buf; } GPUStorageBuf *GetDuvWeightsBuffer() const { return duv_weights_buf; } GPUStorageBuf *GetDvvWeightsBuffer() const { return dvv_weights_buf; } int GetNumStencils() const { return _numStencils; } private: GPUStorageBuf *sizes_buf = nullptr; GPUStorageBuf *offsets_buf = nullptr; GPUStorageBuf *indices_buf = nullptr; GPUStorageBuf *weights_buf = nullptr; GPUStorageBuf *du_weights_buf = nullptr; GPUStorageBuf *dv_weights_buf = nullptr; GPUStorageBuf *duu_weights_buf = nullptr; GPUStorageBuf *duv_weights_buf = nullptr; GPUStorageBuf *dvv_weights_buf = nullptr; int _numStencils; }; // --------------------------------------------------------------------------- class GPUComputeEvaluator { public: using Instantiatable = bool; /** * Blender doesn't use 2nd derivatives, but the OSD evaluator cache does expect this constructor * to be present. */ static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, OpenSubdiv::Osd::BufferDescriptor const & /*duuDesc*/, OpenSubdiv::Osd::BufferDescriptor const & /*duvDesc*/, OpenSubdiv::Osd::BufferDescriptor const & /*dvvDesc*/, void *deviceContext = nullptr) { return Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext); } static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, void * /*deviceContext*/ = nullptr) { GPUComputeEvaluator *instance = new GPUComputeEvaluator(); if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc)) { return instance; } delete instance; return nullptr; } /// Constructor. GPUComputeEvaluator(); /// Destructor. note that the GL context must be made current. ~GPUComputeEvaluator(); /// ---------------------------------------------------------------------- /// /// Stencil evaluations with StencilTable /// /// ---------------------------------------------------------------------- /// \brief Generic static stencil function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLSL kernel /// template static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor()); if (instance) { bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable); delete instance; return r; } return false; } /// \brief Generic static stencil function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the dstBuffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLSL kernel /// template static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, STENCIL_TABLE const *stencilTable, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, stencilTable); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc); if (instance) { bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, stencilTable); delete instance; return r; } return false; } /// \brief Generic stencil function. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// template bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable) const { return EvalStencils(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, nullptr, OpenSubdiv::Osd::BufferDescriptor(), nullptr, OpenSubdiv::Osd::BufferDescriptor(), stencilTable->GetSizesBuffer(), stencilTable->GetOffsetsBuffer(), stencilTable->GetIndicesBuffer(), stencilTable->GetWeightsBuffer(), 0, 0, /* start = */ 0, /* end = */ stencilTable->GetNumStencils()); } /// \brief Generic stencil function. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the dstBuffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// template bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, STENCIL_TABLE const *stencilTable) const { return EvalStencils(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, duBuffer->get_vertex_buffer(), duDesc, dvBuffer->get_vertex_buffer(), dvDesc, stencilTable->GetSizesBuffer(), stencilTable->GetOffsetsBuffer(), stencilTable->GetIndicesBuffer(), stencilTable->GetWeightsBuffer(), stencilTable->GetDuWeightsBuffer(), stencilTable->GetDvWeightsBuffer(), /* start = */ 0, /* end = */ stencilTable->GetNumStencils()); } /// \brief Dispatch the GLSL compute kernel on GPU asynchronously /// returns false if the kernel hasn't been compiled yet. /// /// @param srcBuffer GL buffer of input primvar source data /// /// @param srcDesc vertex buffer descriptor for the srcBuffer /// /// @param dstBuffer GL buffer of output primvar destination data /// /// @param dstDesc vertex buffer descriptor for the dstBuffer /// /// @param duBuffer GL buffer of output derivative wrt u /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer GL buffer of output derivative wrt v /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param sizesBuffer GL buffer of the sizes in the stencil table /// /// @param offsetsBuffer GL buffer of the offsets in the stencil table /// /// @param indicesBuffer GL buffer of the indices in the stencil table /// /// @param weightsBuffer GL buffer of the weights in the stencil table /// /// @param duWeightsBuffer GL buffer of the du weights in the stencil table /// /// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table /// /// @param start start index of stencil table /// /// @param end end index of stencil table /// bool EvalStencils(gpu::VertBuf *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, gpu::VertBuf *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, gpu::VertBuf *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, gpu::VertBuf *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, GPUStorageBuf *sizesBuffer, GPUStorageBuf *offsetsBuffer, GPUStorageBuf *indicesBuffer, GPUStorageBuf *weightsBuffer, GPUStorageBuf *duWeightsBuffer, GPUStorageBuf *dvWeightsBuffer, int start, int end) const; /// ---------------------------------------------------------------------- /// /// Limit evaluations with PatchTable /// /// ---------------------------------------------------------------------- /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLXFB evaluator /// template static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalPatches( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor()); if (instance) { bool r = instance->EvalPatches( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); delete instance; return r; } return false; } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLXFB evaluator /// template static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc); if (instance) { bool r = instance->EvalPatches(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable); delete instance; return r; } return false; } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// template bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable) { return EvalPatches(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, nullptr, OpenSubdiv::Osd::BufferDescriptor(), nullptr, OpenSubdiv::Osd::BufferDescriptor(), numPatchCoords, patchCoords->get_vertex_buffer(), patchTable->GetPatchArrays(), patchTable->GetPatchIndexBuffer(), patchTable->GetPatchParamBuffer()); } /// \brief Generic limit eval function with derivatives. This function has /// a same signature as other device kernels have so that it can be /// called in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// /// @param patchTable GLPatchTable or equivalent /// template bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable) { return EvalPatches(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, duBuffer->get_vertex_buffer(), duDesc, dvBuffer->get_vertex_buffer(), dvDesc, numPatchCoords, patchCoords->get_vertex_buffer(), patchTable->GetPatchArrays(), patchTable->GetPatchIndexBuffer(), patchTable->GetPatchParamBuffer()); } bool EvalPatches(gpu::VertBuf *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, gpu::VertBuf *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, gpu::VertBuf *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, gpu::VertBuf *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int numPatchCoords, gpu::VertBuf *patchCoordsBuffer, const OpenSubdiv::Osd::PatchArrayVector &patchArrays, GPUStorageBuf *patchIndexBuffer, GPUStorageBuf *patchParamsBuffer); /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLXFB evaluator /// template static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor()); if (instance) { bool r = instance->EvalPatchesVarying( srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); delete instance; return r; } return false; } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// template bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable) { return EvalPatches(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, nullptr, OpenSubdiv::Osd::BufferDescriptor(), nullptr, OpenSubdiv::Osd::BufferDescriptor(), numPatchCoords, patchCoords->get_vertex_buffer(), patchTable->GetVaryingPatchArrays(), patchTable->GetVaryingPatchIndexBuffer(), patchTable->GetPatchParamBuffer()); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLXFB evaluator /// template static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalPatchesVarying(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc); if (instance) { bool r = instance->EvalPatchesVarying(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable); delete instance; return r; } return false; } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// template bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable) { return EvalPatches(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, duBuffer->get_vertex_buffer(), duDesc, dvBuffer->get_vertex_buffer(), dvDesc, numPatchCoords, patchCoords->get_vertex_buffer(), patchTable->GetVaryingPatchArrays(), patchTable->GetVaryingPatchIndexBuffer(), patchTable->GetPatchParamBuffer()); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLXFB evaluator /// template static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalPatchesFaceVarying(srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, fvarChannel); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor()); if (instance) { bool r = instance->EvalPatchesFaceVarying(srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable, fvarChannel); delete instance; return r; } return false; } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// template bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel = 0) { return EvalPatches(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, 0, OpenSubdiv::Osd::BufferDescriptor(), 0, OpenSubdiv::Osd::BufferDescriptor(), numPatchCoords, patchCoords->get_vertex_buffer(), patchTable->GetFVarPatchArrays(fvarChannel), patchTable->GetFVarPatchIndexBuffer(fvarChannel), patchTable->GetFVarPatchParamBuffer(fvarChannel)); } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although /// it may cause a performance problem. /// /// @param deviceContext not used in the GLXFB evaluator /// template static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel, GPUComputeEvaluator *instance, void *deviceContext = nullptr) { if (instance) { return instance->EvalPatchesFaceVarying(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, fvarChannel); } // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc, duDesc, dvDesc); if (instance) { bool r = instance->EvalPatchesFaceVarying(srcBuffer, srcDesc, dstBuffer, dstDesc, duBuffer, duDesc, dvBuffer, dvDesc, numPatchCoords, patchCoords, patchTable, fvarChannel); delete instance; return r; } return false; } /// \brief Generic limit eval function. This function has a same /// signature as other device kernels have so that it can be called /// in the same way. /// /// @param srcBuffer Input primvar buffer. /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param duBuffer Output buffer derivative wrt u /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// object of destination data /// /// @param duDesc vertex buffer descriptor for the duBuffer /// /// @param dvBuffer Output buffer derivative wrt v /// Must have `get_vertex_buffer()` returning a `gpu::VertBuf` /// buffer object of destination data /// /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// must have BindVBO() method returning an /// array of PatchCoord struct in VBO. /// /// @param patchTable GLPatchTable or equivalent /// /// @param fvarChannel face-varying channel /// template bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, OpenSubdiv::Osd::BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel = 0) { return EvalPatches(srcBuffer->get_vertex_buffer(), srcDesc, dstBuffer->get_vertex_buffer(), dstDesc, duBuffer->get_vertex_buffer(), duDesc, dvBuffer->get_vertex_buffer(), dvDesc, numPatchCoords, patchCoords->get_vertex_buffer(), patchTable->GetFVarPatchArrays(fvarChannel), patchTable->GetFVarPatchIndexBuffer(fvarChannel), patchTable->GetFVarPatchParamBuffer(fvarChannel)); } /// ---------------------------------------------------------------------- /// /// Other methods /// /// ---------------------------------------------------------------------- /// Configure GLSL kernel. A valid GL context must be made current before /// calling this function. Returns false if it fails to compile the kernel. bool Compile( OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc = OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvDesc = OpenSubdiv::Osd::BufferDescriptor()); /// Wait the dispatched kernel finishes. static void Synchronize(void *deviceContext); private: struct _StencilKernel { _StencilKernel(); ~_StencilKernel(); bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int workGroupSize); GPUShader *shader = nullptr; int uniformStart = 0; int uniformEnd = 0; int uniformSrcOffset = 0; int uniformDstOffset = 0; int uniformDuDesc = 0; int uniformDvDesc = 0; } _stencilKernel; struct _PatchKernel { _PatchKernel(); ~_PatchKernel(); bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc, OpenSubdiv::Osd::BufferDescriptor const &dvDesc, int workGroupSize); GPUShader *shader = nullptr; int uniformSrcOffset = 0; int uniformDstOffset = 0; int uniformDuDesc = 0; int uniformDvDesc = 0; } _patchKernel; int _workGroupSize; GPUStorageBuf *_patchArraysSSBO = nullptr; int GetDispatchSize(int count) const; void DispatchCompute(GPUShader *shader, int totalDispatchSize) const; }; } // namespace blender::opensubdiv #endif // OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_