Files
test2/intern/opensubdiv/internal/evaluator/gpu_compute_evaluator.h
Jeroen Bakker 96c9f480b3 Cleanup: Subdiv: Remove 2nd derivatives
2nd derivatives aren't used within Blender but all
logic branches end up with checking for it. This PR
removes the 2nd derivatives from the code so we
can skip this test.

Pull Request: https://projects.blender.org/blender/blender/pulls/135745
2025-03-11 09:26:35 +01:00

1394 lines
58 KiB
C++

/* SPDX-FileCopyrightText: 2015 Pixar
*
* SPDX-License-Identifier: Apache-2.0 */
#ifndef OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_
#define OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_
#include <opensubdiv/osd/bufferDescriptor.h>
#include <opensubdiv/osd/types.h>
#include <opensubdiv/version.h>
#include "GPU_storage_buffer.hh"
namespace OpenSubdiv::OPENSUBDIV_VERSION::Far {
class LimitStencilTable;
class StencilTable;
} // namespace OpenSubdiv::OPENSUBDIV_VERSION::Far
// namespace OPENSUBDIV_VERSION
namespace blender::opensubdiv {
/// \brief GL stencil table (Shader Storage buffer)
///
/// This class is a GLSL SSBO representation of OpenSubdiv::Far::StencilTable.
///
/// GLSLComputeKernel consumes this table to apply stencils
///
class GPUStencilTableSSBO {
public:
static GPUStencilTableSSBO *Create(OpenSubdiv::Far::StencilTable const *stencilTable,
void *deviceContext = nullptr)
{
(void)deviceContext; // unused
return new GPUStencilTableSSBO(stencilTable);
}
static GPUStencilTableSSBO *Create(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable,
void *deviceContext = nullptr)
{
(void)deviceContext; // unused
return new GPUStencilTableSSBO(limitStencilTable);
}
explicit GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable);
explicit GPUStencilTableSSBO(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable);
~GPUStencilTableSSBO();
// interfaces needed for GLSLComputeKernel
GPUStorageBuf *GetSizesBuffer() const
{
return sizes_buf;
}
GPUStorageBuf *GetOffsetsBuffer() const
{
return offsets_buf;
}
GPUStorageBuf *GetIndicesBuffer() const
{
return indices_buf;
}
GPUStorageBuf *GetWeightsBuffer() const
{
return weights_buf;
}
GPUStorageBuf *GetDuWeightsBuffer() const
{
return du_weights_buf;
}
GPUStorageBuf *GetDvWeightsBuffer() const
{
return dv_weights_buf;
}
GPUStorageBuf *GetDuuWeightsBuffer() const
{
return duu_weights_buf;
}
GPUStorageBuf *GetDuvWeightsBuffer() const
{
return duv_weights_buf;
}
GPUStorageBuf *GetDvvWeightsBuffer() const
{
return dvv_weights_buf;
}
int GetNumStencils() const
{
return _numStencils;
}
private:
GPUStorageBuf *sizes_buf = nullptr;
GPUStorageBuf *offsets_buf = nullptr;
GPUStorageBuf *indices_buf = nullptr;
GPUStorageBuf *weights_buf = nullptr;
GPUStorageBuf *du_weights_buf = nullptr;
GPUStorageBuf *dv_weights_buf = nullptr;
GPUStorageBuf *duu_weights_buf = nullptr;
GPUStorageBuf *duv_weights_buf = nullptr;
GPUStorageBuf *dvv_weights_buf = nullptr;
int _numStencils;
};
// ---------------------------------------------------------------------------
class GPUComputeEvaluator {
public:
using Instantiatable = bool;
/**
* Blender doesn't use 2nd derivatives, but the OSD evaluator cache does expect this constructor
* to be present.
*/
static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
OpenSubdiv::Osd::BufferDescriptor const & /*duuDesc*/,
OpenSubdiv::Osd::BufferDescriptor const & /*duvDesc*/,
OpenSubdiv::Osd::BufferDescriptor const & /*dvvDesc*/,
void *deviceContext = nullptr)
{
return Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext);
}
static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
void * /*deviceContext*/ = nullptr)
{
GPUComputeEvaluator *instance = new GPUComputeEvaluator();
if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc)) {
return instance;
}
delete instance;
return nullptr;
}
/// Constructor.
GPUComputeEvaluator();
/// Destructor. note that the GL context must be made current.
~GPUComputeEvaluator();
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLSL kernel
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLSL kernel
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalStencils(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
stencilTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalStencils(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
stencilTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic stencil function.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const
{
return EvalStencils(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
0,
0,
/* start = */ 0,
/* end = */ stencilTable->GetNumStencils());
}
/// \brief Generic stencil function.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable) const
{
return EvalStencils(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
stencilTable->GetDuWeightsBuffer(),
stencilTable->GetDvWeightsBuffer(),
/* start = */ 0,
/* end = */ stencilTable->GetNumStencils());
}
/// \brief Dispatch the GLSL compute kernel on GPU asynchronously
/// returns false if the kernel hasn't been compiled yet.
///
/// @param srcBuffer GL buffer of input primvar source data
///
/// @param srcDesc vertex buffer descriptor for the srcBuffer
///
/// @param dstBuffer GL buffer of output primvar destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param duBuffer GL buffer of output derivative wrt u
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer GL buffer of output derivative wrt v
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param sizesBuffer GL buffer of the sizes in the stencil table
///
/// @param offsetsBuffer GL buffer of the offsets in the stencil table
///
/// @param indicesBuffer GL buffer of the indices in the stencil table
///
/// @param weightsBuffer GL buffer of the weights in the stencil table
///
/// @param duWeightsBuffer GL buffer of the du weights in the stencil table
///
/// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
bool EvalStencils(gpu::VertBuf *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
gpu::VertBuf *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
gpu::VertBuf *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
gpu::VertBuf *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
GPUStorageBuf *sizesBuffer,
GPUStorageBuf *offsetsBuffer,
GPUStorageBuf *indicesBuffer,
GPUStorageBuf *weightsBuffer,
GPUStorageBuf *duWeightsBuffer,
GPUStorageBuf *dvWeightsBuffer,
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatches(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalPatches(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatches(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
bool EvalPatches(gpu::VertBuf *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
gpu::VertBuf *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
gpu::VertBuf *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
gpu::VertBuf *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
gpu::VertBuf *patchCoordsBuffer,
const OpenSubdiv::Osd::PatchArrayVector &patchArrays,
GPUStorageBuf *patchIndexBuffer,
GPUStorageBuf *patchParamsBuffer);
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesVarying(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalPatchesVarying(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetVaryingPatchArrays(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalPatchesVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetVaryingPatchArrays(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel = 0)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
0,
OpenSubdiv::Osd::BufferDescriptor(),
0,
OpenSubdiv::Osd::BufferDescriptor(),
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetFVarPatchArrays(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel = 0)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetFVarPatchArrays(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc = OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor const &dvDesc = OpenSubdiv::Osd::BufferDescriptor());
/// Wait the dispatched kernel finishes.
static void Synchronize(void *deviceContext);
private:
struct _StencilKernel {
_StencilKernel();
~_StencilKernel();
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int workGroupSize);
GPUShader *shader = nullptr;
int uniformStart = 0;
int uniformEnd = 0;
int uniformSrcOffset = 0;
int uniformDstOffset = 0;
int uniformDuDesc = 0;
int uniformDvDesc = 0;
} _stencilKernel;
struct _PatchKernel {
_PatchKernel();
~_PatchKernel();
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int workGroupSize);
GPUShader *shader = nullptr;
int uniformSrcOffset = 0;
int uniformDstOffset = 0;
int uniformDuDesc = 0;
int uniformDvDesc = 0;
} _patchKernel;
int _workGroupSize;
GPUStorageBuf *_patchArraysSSBO = nullptr;
int GetDispatchSize(int count) const;
void DispatchCompute(GPUShader *shader, int totalDispatchSize) const;
};
} // namespace blender::opensubdiv
#endif // OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_