Files
test2/intern/opensubdiv/internal/evaluator/gpu_compute_evaluator.h
Clément Foucault 7a97105b28 GPU: Remove wrapper type for gpu::StorageBuf
This is the first step into merging DRW_gpu_wrapper.hh into
the GPU module.

This is very similar to #119825.

Pull Request: https://projects.blender.org/blender/blender/pulls/144329
2025-08-11 10:35:53 +02:00

1394 lines
59 KiB
C++

/* SPDX-FileCopyrightText: 2015 Pixar
*
* SPDX-License-Identifier: Apache-2.0 */
#ifndef OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_
#define OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_
#include <opensubdiv/osd/bufferDescriptor.h>
#include <opensubdiv/osd/types.h>
#include <opensubdiv/version.h>
#include "GPU_storage_buffer.hh"
namespace OpenSubdiv::OPENSUBDIV_VERSION::Far {
class LimitStencilTable;
class StencilTable;
} // namespace OpenSubdiv::OPENSUBDIV_VERSION::Far
// namespace OPENSUBDIV_VERSION
namespace blender::opensubdiv {
/// \brief GL stencil table (Shader Storage buffer)
///
/// This class is a GLSL SSBO representation of OpenSubdiv::Far::StencilTable.
///
/// GLSLComputeKernel consumes this table to apply stencils
///
class GPUStencilTableSSBO {
public:
static GPUStencilTableSSBO *Create(OpenSubdiv::Far::StencilTable const *stencilTable,
void *deviceContext = nullptr)
{
(void)deviceContext; // unused
return new GPUStencilTableSSBO(stencilTable);
}
static GPUStencilTableSSBO *Create(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable,
void *deviceContext = nullptr)
{
(void)deviceContext; // unused
return new GPUStencilTableSSBO(limitStencilTable);
}
explicit GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable);
explicit GPUStencilTableSSBO(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable);
~GPUStencilTableSSBO();
// interfaces needed for GLSLComputeKernel
gpu::StorageBuf *GetSizesBuffer() const
{
return sizes_buf;
}
gpu::StorageBuf *GetOffsetsBuffer() const
{
return offsets_buf;
}
gpu::StorageBuf *GetIndicesBuffer() const
{
return indices_buf;
}
gpu::StorageBuf *GetWeightsBuffer() const
{
return weights_buf;
}
gpu::StorageBuf *GetDuWeightsBuffer() const
{
return du_weights_buf;
}
gpu::StorageBuf *GetDvWeightsBuffer() const
{
return dv_weights_buf;
}
gpu::StorageBuf *GetDuuWeightsBuffer() const
{
return duu_weights_buf;
}
gpu::StorageBuf *GetDuvWeightsBuffer() const
{
return duv_weights_buf;
}
gpu::StorageBuf *GetDvvWeightsBuffer() const
{
return dvv_weights_buf;
}
int GetNumStencils() const
{
return _numStencils;
}
private:
gpu::StorageBuf *sizes_buf = nullptr;
gpu::StorageBuf *offsets_buf = nullptr;
gpu::StorageBuf *indices_buf = nullptr;
gpu::StorageBuf *weights_buf = nullptr;
gpu::StorageBuf *du_weights_buf = nullptr;
gpu::StorageBuf *dv_weights_buf = nullptr;
gpu::StorageBuf *duu_weights_buf = nullptr;
gpu::StorageBuf *duv_weights_buf = nullptr;
gpu::StorageBuf *dvv_weights_buf = nullptr;
int _numStencils;
};
// ---------------------------------------------------------------------------
class GPUComputeEvaluator {
public:
using Instantiatable = bool;
/**
* Blender doesn't use 2nd derivatives, but the OSD evaluator cache does expect this constructor
* to be present.
*/
static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
OpenSubdiv::Osd::BufferDescriptor const & /*duuDesc*/,
OpenSubdiv::Osd::BufferDescriptor const & /*duvDesc*/,
OpenSubdiv::Osd::BufferDescriptor const & /*dvvDesc*/,
void *deviceContext = nullptr)
{
return Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext);
}
static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
void * /*deviceContext*/ = nullptr)
{
GPUComputeEvaluator *instance = new GPUComputeEvaluator();
if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc)) {
return instance;
}
delete instance;
return nullptr;
}
/// Constructor.
GPUComputeEvaluator();
/// Destructor. note that the GL context must be made current.
~GPUComputeEvaluator();
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLSL kernel
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLSL kernel
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalStencils(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
stencilTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalStencils(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
stencilTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic stencil function.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const
{
return EvalStencils(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
0,
0,
/* start = */ 0,
/* end = */ stencilTable->GetNumStencils());
}
/// \brief Generic stencil function.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable) const
{
return EvalStencils(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
stencilTable->GetDuWeightsBuffer(),
stencilTable->GetDvWeightsBuffer(),
/* start = */ 0,
/* end = */ stencilTable->GetNumStencils());
}
/// \brief Dispatch the GLSL compute kernel on GPU asynchronously
/// returns false if the kernel hasn't been compiled yet.
///
/// @param srcBuffer GL buffer of input primvar source data
///
/// @param srcDesc vertex buffer descriptor for the srcBuffer
///
/// @param dstBuffer GL buffer of output primvar destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param duBuffer GL buffer of output derivative wrt u
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer GL buffer of output derivative wrt v
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param sizesBuffer GL buffer of the sizes in the stencil table
///
/// @param offsetsBuffer GL buffer of the offsets in the stencil table
///
/// @param indicesBuffer GL buffer of the indices in the stencil table
///
/// @param weightsBuffer GL buffer of the weights in the stencil table
///
/// @param duWeightsBuffer GL buffer of the du weights in the stencil table
///
/// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
bool EvalStencils(gpu::VertBuf *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
gpu::VertBuf *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
gpu::VertBuf *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
gpu::VertBuf *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
gpu::StorageBuf *sizesBuffer,
gpu::StorageBuf *offsetsBuffer,
gpu::StorageBuf *indicesBuffer,
gpu::StorageBuf *weightsBuffer,
gpu::StorageBuf *duWeightsBuffer,
gpu::StorageBuf *dvWeightsBuffer,
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatches(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalPatches(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatches(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatches(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
bool EvalPatches(gpu::VertBuf *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
gpu::VertBuf *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
gpu::VertBuf *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
gpu::VertBuf *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
gpu::VertBuf *patchCoordsBuffer,
const OpenSubdiv::Osd::PatchArrayVector &patchArrays,
gpu::StorageBuf *patchIndexBuffer,
gpu::StorageBuf *patchParamsBuffer);
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesVarying(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalPatchesVarying(
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
nullptr,
OpenSubdiv::Osd::BufferDescriptor(),
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetVaryingPatchArrays(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalPatchesVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetVaryingPatchArrays(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc,
dstDesc,
OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor());
if (instance) {
bool r = instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel = 0)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
0,
OpenSubdiv::Osd::BufferDescriptor(),
0,
OpenSubdiv::Osd::BufferDescriptor(),
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetFVarPatchArrays(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
GPUComputeEvaluator *instance,
void *deviceContext = nullptr)
{
if (instance) {
return instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
}
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
if (instance) {
bool r = instance->EvalPatchesFaceVarying(srcBuffer,
srcDesc,
dstBuffer,
dstDesc,
duBuffer,
duDesc,
dvBuffer,
dvDesc,
numPatchCoords,
patchCoords,
patchTable,
fvarChannel);
delete instance;
return r;
}
return false;
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
template<typename SRC_BUFFER,
typename DST_BUFFER,
typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel = 0)
{
return EvalPatches(srcBuffer->get_vertex_buffer(),
srcDesc,
dstBuffer->get_vertex_buffer(),
dstDesc,
duBuffer->get_vertex_buffer(),
duDesc,
dvBuffer->get_vertex_buffer(),
dvDesc,
numPatchCoords,
patchCoords->get_vertex_buffer(),
patchTable->GetFVarPatchArrays(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc = OpenSubdiv::Osd::BufferDescriptor(),
OpenSubdiv::Osd::BufferDescriptor const &dvDesc = OpenSubdiv::Osd::BufferDescriptor());
/// Wait the dispatched kernel finishes.
static void Synchronize(void *deviceContext);
private:
struct _StencilKernel {
_StencilKernel();
~_StencilKernel();
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int workGroupSize);
blender::gpu::Shader *shader = nullptr;
int uniformStart = 0;
int uniformEnd = 0;
int uniformSrcOffset = 0;
int uniformDstOffset = 0;
int uniformDuDesc = 0;
int uniformDvDesc = 0;
} _stencilKernel;
struct _PatchKernel {
_PatchKernel();
~_PatchKernel();
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
int workGroupSize);
blender::gpu::Shader *shader = nullptr;
int uniformSrcOffset = 0;
int uniformDstOffset = 0;
int uniformDuDesc = 0;
int uniformDvDesc = 0;
} _patchKernel;
int _workGroupSize;
gpu::StorageBuf *_patchArraysSSBO = nullptr;
int GetDispatchSize(int count) const;
void DispatchCompute(blender::gpu::Shader *shader, int totalDispatchSize) const;
};
} // namespace blender::opensubdiv
#endif // OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_