This is the first step into merging DRW_gpu_wrapper.hh into the GPU module. This is very similar to #119825. Pull Request: https://projects.blender.org/blender/blender/pulls/144329
1394 lines
59 KiB
C++
1394 lines
59 KiB
C++
/* SPDX-FileCopyrightText: 2015 Pixar
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#ifndef OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_
|
|
#define OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_
|
|
|
|
#include <opensubdiv/osd/bufferDescriptor.h>
|
|
#include <opensubdiv/osd/types.h>
|
|
#include <opensubdiv/version.h>
|
|
|
|
#include "GPU_storage_buffer.hh"
|
|
|
|
namespace OpenSubdiv::OPENSUBDIV_VERSION::Far {
|
|
class LimitStencilTable;
|
|
class StencilTable;
|
|
} // namespace OpenSubdiv::OPENSUBDIV_VERSION::Far
|
|
// namespace OPENSUBDIV_VERSION
|
|
|
|
namespace blender::opensubdiv {
|
|
|
|
/// \brief GL stencil table (Shader Storage buffer)
|
|
///
|
|
/// This class is a GLSL SSBO representation of OpenSubdiv::Far::StencilTable.
|
|
///
|
|
/// GLSLComputeKernel consumes this table to apply stencils
|
|
///
|
|
class GPUStencilTableSSBO {
|
|
public:
|
|
static GPUStencilTableSSBO *Create(OpenSubdiv::Far::StencilTable const *stencilTable,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
(void)deviceContext; // unused
|
|
return new GPUStencilTableSSBO(stencilTable);
|
|
}
|
|
static GPUStencilTableSSBO *Create(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
(void)deviceContext; // unused
|
|
return new GPUStencilTableSSBO(limitStencilTable);
|
|
}
|
|
|
|
explicit GPUStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable);
|
|
explicit GPUStencilTableSSBO(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable);
|
|
~GPUStencilTableSSBO();
|
|
|
|
// interfaces needed for GLSLComputeKernel
|
|
gpu::StorageBuf *GetSizesBuffer() const
|
|
{
|
|
return sizes_buf;
|
|
}
|
|
gpu::StorageBuf *GetOffsetsBuffer() const
|
|
{
|
|
return offsets_buf;
|
|
}
|
|
gpu::StorageBuf *GetIndicesBuffer() const
|
|
{
|
|
return indices_buf;
|
|
}
|
|
gpu::StorageBuf *GetWeightsBuffer() const
|
|
{
|
|
return weights_buf;
|
|
}
|
|
gpu::StorageBuf *GetDuWeightsBuffer() const
|
|
{
|
|
return du_weights_buf;
|
|
}
|
|
gpu::StorageBuf *GetDvWeightsBuffer() const
|
|
{
|
|
return dv_weights_buf;
|
|
}
|
|
gpu::StorageBuf *GetDuuWeightsBuffer() const
|
|
{
|
|
return duu_weights_buf;
|
|
}
|
|
gpu::StorageBuf *GetDuvWeightsBuffer() const
|
|
{
|
|
return duv_weights_buf;
|
|
}
|
|
gpu::StorageBuf *GetDvvWeightsBuffer() const
|
|
{
|
|
return dvv_weights_buf;
|
|
}
|
|
int GetNumStencils() const
|
|
{
|
|
return _numStencils;
|
|
}
|
|
|
|
private:
|
|
gpu::StorageBuf *sizes_buf = nullptr;
|
|
gpu::StorageBuf *offsets_buf = nullptr;
|
|
gpu::StorageBuf *indices_buf = nullptr;
|
|
gpu::StorageBuf *weights_buf = nullptr;
|
|
gpu::StorageBuf *du_weights_buf = nullptr;
|
|
gpu::StorageBuf *dv_weights_buf = nullptr;
|
|
gpu::StorageBuf *duu_weights_buf = nullptr;
|
|
gpu::StorageBuf *duv_weights_buf = nullptr;
|
|
gpu::StorageBuf *dvv_weights_buf = nullptr;
|
|
int _numStencils;
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
class GPUComputeEvaluator {
|
|
public:
|
|
using Instantiatable = bool;
|
|
/**
|
|
* Blender doesn't use 2nd derivatives, but the OSD evaluator cache does expect this constructor
|
|
* to be present.
|
|
*/
|
|
static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const & /*duuDesc*/,
|
|
OpenSubdiv::Osd::BufferDescriptor const & /*duvDesc*/,
|
|
OpenSubdiv::Osd::BufferDescriptor const & /*dvvDesc*/,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
return Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext);
|
|
}
|
|
|
|
static GPUComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
void * /*deviceContext*/ = nullptr)
|
|
{
|
|
GPUComputeEvaluator *instance = new GPUComputeEvaluator();
|
|
if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc)) {
|
|
return instance;
|
|
}
|
|
delete instance;
|
|
return nullptr;
|
|
}
|
|
|
|
/// Constructor.
|
|
GPUComputeEvaluator();
|
|
|
|
/// Destructor. note that the GL context must be made current.
|
|
~GPUComputeEvaluator();
|
|
|
|
/// ----------------------------------------------------------------------
|
|
///
|
|
/// Stencil evaluations with StencilTable
|
|
///
|
|
/// ----------------------------------------------------------------------
|
|
|
|
/// \brief Generic static stencil function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// transparently from OsdMesh template interface.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param stencilTable stencil table to be applied. The table must have
|
|
/// SSBO interfaces.
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLSL kernel
|
|
///
|
|
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
|
static bool EvalStencils(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
STENCIL_TABLE const *stencilTable,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
|
|
if (instance) {
|
|
return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable);
|
|
}
|
|
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc,
|
|
dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
OpenSubdiv::Osd::BufferDescriptor());
|
|
if (instance) {
|
|
bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic static stencil function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// transparently from OsdMesh template interface.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the dstBuffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param stencilTable stencil table to be applied. The table must have
|
|
/// SSBO interfaces.
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLSL kernel
|
|
///
|
|
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
|
static bool EvalStencils(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
STENCIL_TABLE const *stencilTable,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
|
|
if (instance) {
|
|
return instance->EvalStencils(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
stencilTable);
|
|
}
|
|
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
|
|
if (instance) {
|
|
bool r = instance->EvalStencils(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
stencilTable);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic stencil function.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param stencilTable stencil table to be applied. The table must have
|
|
/// SSBO interfaces.
|
|
///
|
|
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
|
bool EvalStencils(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
STENCIL_TABLE const *stencilTable) const
|
|
{
|
|
return EvalStencils(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
nullptr,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
nullptr,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
stencilTable->GetSizesBuffer(),
|
|
stencilTable->GetOffsetsBuffer(),
|
|
stencilTable->GetIndicesBuffer(),
|
|
stencilTable->GetWeightsBuffer(),
|
|
0,
|
|
0,
|
|
/* start = */ 0,
|
|
/* end = */ stencilTable->GetNumStencils());
|
|
}
|
|
|
|
/// \brief Generic stencil function.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the dstBuffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param stencilTable stencil table to be applied. The table must have
|
|
/// SSBO interfaces.
|
|
///
|
|
template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
|
bool EvalStencils(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
STENCIL_TABLE const *stencilTable) const
|
|
{
|
|
return EvalStencils(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
duBuffer->get_vertex_buffer(),
|
|
duDesc,
|
|
dvBuffer->get_vertex_buffer(),
|
|
dvDesc,
|
|
stencilTable->GetSizesBuffer(),
|
|
stencilTable->GetOffsetsBuffer(),
|
|
stencilTable->GetIndicesBuffer(),
|
|
stencilTable->GetWeightsBuffer(),
|
|
stencilTable->GetDuWeightsBuffer(),
|
|
stencilTable->GetDvWeightsBuffer(),
|
|
/* start = */ 0,
|
|
/* end = */ stencilTable->GetNumStencils());
|
|
}
|
|
|
|
/// \brief Dispatch the GLSL compute kernel on GPU asynchronously
|
|
/// returns false if the kernel hasn't been compiled yet.
|
|
///
|
|
/// @param srcBuffer GL buffer of input primvar source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the srcBuffer
|
|
///
|
|
/// @param dstBuffer GL buffer of output primvar destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the dstBuffer
|
|
///
|
|
/// @param duBuffer GL buffer of output derivative wrt u
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer GL buffer of output derivative wrt v
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param sizesBuffer GL buffer of the sizes in the stencil table
|
|
///
|
|
/// @param offsetsBuffer GL buffer of the offsets in the stencil table
|
|
///
|
|
/// @param indicesBuffer GL buffer of the indices in the stencil table
|
|
///
|
|
/// @param weightsBuffer GL buffer of the weights in the stencil table
|
|
///
|
|
/// @param duWeightsBuffer GL buffer of the du weights in the stencil table
|
|
///
|
|
/// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table
|
|
///
|
|
/// @param start start index of stencil table
|
|
///
|
|
/// @param end end index of stencil table
|
|
///
|
|
bool EvalStencils(gpu::VertBuf *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
gpu::VertBuf *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
gpu::VertBuf *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
gpu::VertBuf *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
gpu::StorageBuf *sizesBuffer,
|
|
gpu::StorageBuf *offsetsBuffer,
|
|
gpu::StorageBuf *indicesBuffer,
|
|
gpu::StorageBuf *weightsBuffer,
|
|
gpu::StorageBuf *duWeightsBuffer,
|
|
gpu::StorageBuf *dvWeightsBuffer,
|
|
int start,
|
|
int end) const;
|
|
|
|
/// ----------------------------------------------------------------------
|
|
///
|
|
/// Limit evaluations with PatchTable
|
|
///
|
|
/// ----------------------------------------------------------------------
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLXFB evaluator
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
static bool EvalPatches(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
|
|
if (instance) {
|
|
return instance->EvalPatches(
|
|
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
|
|
}
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc,
|
|
dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
OpenSubdiv::Osd::BufferDescriptor());
|
|
if (instance) {
|
|
bool r = instance->EvalPatches(
|
|
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLXFB evaluator
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
static bool EvalPatches(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
if (instance) {
|
|
return instance->EvalPatches(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable);
|
|
}
|
|
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
|
|
if (instance) {
|
|
bool r = instance->EvalPatches(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
bool EvalPatches(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable)
|
|
{
|
|
|
|
return EvalPatches(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
nullptr,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
nullptr,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
numPatchCoords,
|
|
patchCoords->get_vertex_buffer(),
|
|
patchTable->GetPatchArrays(),
|
|
patchTable->GetPatchIndexBuffer(),
|
|
patchTable->GetPatchParamBuffer());
|
|
}
|
|
|
|
/// \brief Generic limit eval function with derivatives. This function has
|
|
/// a same signature as other device kernels have so that it can be
|
|
/// called in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
bool EvalPatches(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable)
|
|
{
|
|
|
|
return EvalPatches(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
duBuffer->get_vertex_buffer(),
|
|
duDesc,
|
|
dvBuffer->get_vertex_buffer(),
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords->get_vertex_buffer(),
|
|
patchTable->GetPatchArrays(),
|
|
patchTable->GetPatchIndexBuffer(),
|
|
patchTable->GetPatchParamBuffer());
|
|
}
|
|
|
|
bool EvalPatches(gpu::VertBuf *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
gpu::VertBuf *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
gpu::VertBuf *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
gpu::VertBuf *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
gpu::VertBuf *patchCoordsBuffer,
|
|
const OpenSubdiv::Osd::PatchArrayVector &patchArrays,
|
|
gpu::StorageBuf *patchIndexBuffer,
|
|
gpu::StorageBuf *patchParamsBuffer);
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLXFB evaluator
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
if (instance) {
|
|
return instance->EvalPatchesVarying(
|
|
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
|
|
}
|
|
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc,
|
|
dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
OpenSubdiv::Osd::BufferDescriptor());
|
|
if (instance) {
|
|
bool r = instance->EvalPatchesVarying(
|
|
srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable)
|
|
{
|
|
|
|
return EvalPatches(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
nullptr,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
nullptr,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
numPatchCoords,
|
|
patchCoords->get_vertex_buffer(),
|
|
patchTable->GetVaryingPatchArrays(),
|
|
patchTable->GetVaryingPatchIndexBuffer(),
|
|
patchTable->GetPatchParamBuffer());
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLXFB evaluator
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
if (instance) {
|
|
return instance->EvalPatchesVarying(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable);
|
|
}
|
|
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
|
|
if (instance) {
|
|
bool r = instance->EvalPatchesVarying(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
bool EvalPatchesVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable)
|
|
{
|
|
|
|
return EvalPatches(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
duBuffer->get_vertex_buffer(),
|
|
duDesc,
|
|
dvBuffer->get_vertex_buffer(),
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords->get_vertex_buffer(),
|
|
patchTable->GetVaryingPatchArrays(),
|
|
patchTable->GetVaryingPatchIndexBuffer(),
|
|
patchTable->GetPatchParamBuffer());
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param fvarChannel face-varying channel
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLXFB evaluator
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
int fvarChannel,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
if (instance) {
|
|
return instance->EvalPatchesFaceVarying(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable,
|
|
fvarChannel);
|
|
}
|
|
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc,
|
|
dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
OpenSubdiv::Osd::BufferDescriptor());
|
|
if (instance) {
|
|
bool r = instance->EvalPatchesFaceVarying(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable,
|
|
fvarChannel);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param fvarChannel face-varying channel
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
int fvarChannel = 0)
|
|
{
|
|
|
|
return EvalPatches(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
0,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
0,
|
|
OpenSubdiv::Osd::BufferDescriptor(),
|
|
numPatchCoords,
|
|
patchCoords->get_vertex_buffer(),
|
|
patchTable->GetFVarPatchArrays(fvarChannel),
|
|
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
|
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param fvarChannel face-varying channel
|
|
///
|
|
/// @param instance cached compiled instance. Clients are supposed to
|
|
/// pre-compile an instance of this class and provide
|
|
/// to this function. If it's null the kernel still
|
|
/// compute by instantiating on-demand kernel although
|
|
/// it may cause a performance problem.
|
|
///
|
|
/// @param deviceContext not used in the GLXFB evaluator
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
int fvarChannel,
|
|
GPUComputeEvaluator *instance,
|
|
void *deviceContext = nullptr)
|
|
{
|
|
if (instance) {
|
|
return instance->EvalPatchesFaceVarying(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable,
|
|
fvarChannel);
|
|
}
|
|
|
|
// Create an instance on demand (slow)
|
|
(void)deviceContext; // unused
|
|
instance = Create(srcDesc, dstDesc, duDesc, dvDesc);
|
|
if (instance) {
|
|
bool r = instance->EvalPatchesFaceVarying(srcBuffer,
|
|
srcDesc,
|
|
dstBuffer,
|
|
dstDesc,
|
|
duBuffer,
|
|
duDesc,
|
|
dvBuffer,
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords,
|
|
patchTable,
|
|
fvarChannel);
|
|
delete instance;
|
|
return r;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// \brief Generic limit eval function. This function has a same
|
|
/// signature as other device kernels have so that it can be called
|
|
/// in the same way.
|
|
///
|
|
/// @param srcBuffer Input primvar buffer.
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of source data
|
|
///
|
|
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
///
|
|
/// @param dstBuffer Output primvar buffer
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
///
|
|
/// @param duBuffer Output buffer derivative wrt u
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// object of destination data
|
|
///
|
|
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
///
|
|
/// @param dvBuffer Output buffer derivative wrt v
|
|
/// Must have `get_vertex_buffer()` returning a `gpu::VertBuf`
|
|
/// buffer object of destination data
|
|
///
|
|
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
///
|
|
/// @param numPatchCoords number of patchCoords.
|
|
///
|
|
/// @param patchCoords array of locations to be evaluated.
|
|
/// must have BindVBO() method returning an
|
|
/// array of PatchCoord struct in VBO.
|
|
///
|
|
/// @param patchTable GLPatchTable or equivalent
|
|
///
|
|
/// @param fvarChannel face-varying channel
|
|
///
|
|
template<typename SRC_BUFFER,
|
|
typename DST_BUFFER,
|
|
typename PATCHCOORD_BUFFER,
|
|
typename PATCH_TABLE>
|
|
bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
DST_BUFFER *dstBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
DST_BUFFER *duBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
DST_BUFFER *dvBuffer,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
PATCHCOORD_BUFFER *patchCoords,
|
|
PATCH_TABLE *patchTable,
|
|
int fvarChannel = 0)
|
|
{
|
|
|
|
return EvalPatches(srcBuffer->get_vertex_buffer(),
|
|
srcDesc,
|
|
dstBuffer->get_vertex_buffer(),
|
|
dstDesc,
|
|
duBuffer->get_vertex_buffer(),
|
|
duDesc,
|
|
dvBuffer->get_vertex_buffer(),
|
|
dvDesc,
|
|
numPatchCoords,
|
|
patchCoords->get_vertex_buffer(),
|
|
patchTable->GetFVarPatchArrays(fvarChannel),
|
|
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
|
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
|
}
|
|
|
|
/// ----------------------------------------------------------------------
|
|
///
|
|
/// Other methods
|
|
///
|
|
/// ----------------------------------------------------------------------
|
|
|
|
/// Configure GLSL kernel. A valid GL context must be made current before
|
|
/// calling this function. Returns false if it fails to compile the kernel.
|
|
bool Compile(
|
|
OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc = OpenSubdiv::Osd::BufferDescriptor(),
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc = OpenSubdiv::Osd::BufferDescriptor());
|
|
|
|
/// Wait the dispatched kernel finishes.
|
|
static void Synchronize(void *deviceContext);
|
|
|
|
private:
|
|
struct _StencilKernel {
|
|
_StencilKernel();
|
|
~_StencilKernel();
|
|
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int workGroupSize);
|
|
blender::gpu::Shader *shader = nullptr;
|
|
int uniformStart = 0;
|
|
int uniformEnd = 0;
|
|
int uniformSrcOffset = 0;
|
|
int uniformDstOffset = 0;
|
|
int uniformDuDesc = 0;
|
|
int uniformDvDesc = 0;
|
|
} _stencilKernel;
|
|
|
|
struct _PatchKernel {
|
|
_PatchKernel();
|
|
~_PatchKernel();
|
|
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dstDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &duDesc,
|
|
OpenSubdiv::Osd::BufferDescriptor const &dvDesc,
|
|
int workGroupSize);
|
|
blender::gpu::Shader *shader = nullptr;
|
|
int uniformSrcOffset = 0;
|
|
int uniformDstOffset = 0;
|
|
int uniformDuDesc = 0;
|
|
int uniformDvDesc = 0;
|
|
} _patchKernel;
|
|
|
|
int _workGroupSize;
|
|
gpu::StorageBuf *_patchArraysSSBO = nullptr;
|
|
|
|
int GetDispatchSize(int count) const;
|
|
|
|
void DispatchCompute(blender::gpu::Shader *shader, int totalDispatchSize) const;
|
|
};
|
|
|
|
} // namespace blender::opensubdiv
|
|
|
|
#endif // OPENSUBDIV_GPU_COMPUTE_EVALUATOR_H_
|