Compositor:
re-optimized the Defocus node. * localized MemoryBuffers * removed read(x,y) calls * shuffled some lines in the execute pixel * added a readNoCheck function to the memorybuffer (only use this when you are certain you are reading a pixel inside the memorybuffer.
This commit is contained in:
@@ -140,6 +140,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
inline void readNoCheck(float result[4], int x, int y) {
|
||||
const int dx = x - this->m_rect.xmin;
|
||||
const int dy = y - this->m_rect.ymin;
|
||||
const int offset = (this->m_chunkWidth * dy + dx) * COM_NUMBER_OF_CHANNELS;
|
||||
copy_v4_v4(result, &this->m_buffer[offset]);
|
||||
}
|
||||
|
||||
void writePixel(int x, int y, const float color[4]);
|
||||
void addPixel(int x, int y, const float color[4]);
|
||||
inline void readCubic(float result[4], float x, float y)
|
||||
|
||||
@@ -101,16 +101,16 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2
|
||||
float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
|
||||
color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);
|
||||
|
||||
for (int ny = miny; ny < maxy; ny += step) {
|
||||
for (int nx = minx; nx < maxx; nx += step) {
|
||||
if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {
|
||||
inputCoordinate.s0 = nx - offsetInput.s0;
|
||||
inputCoordinate.s1 = ny - offsetInput.s1;
|
||||
tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
|
||||
if (size > threshold && tempSize > threshold) {
|
||||
float dx = nx - realCoordinate.s0;
|
||||
float dy = ny - realCoordinate.s1;
|
||||
if (dx != 0 || dy != 0) {
|
||||
if (size > threshold) {
|
||||
for (int ny = miny; ny < maxy; ny += step) {
|
||||
inputCoordinate.s1 = ny - offsetInput.s1;
|
||||
float dy = ny - realCoordinate.s1;
|
||||
for (int nx = minx; nx < maxx; nx += step) {
|
||||
float dx = nx - realCoordinate.s0;
|
||||
if (dx != 0 || dy != 0) {
|
||||
inputCoordinate.s0 = nx - offsetInput.s0;
|
||||
tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
|
||||
if (tempSize > threshold) {
|
||||
if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {
|
||||
float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};
|
||||
bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);
|
||||
@@ -121,8 +121,8 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
color = color_accum * (1.0f / multiplier_accum);
|
||||
|
||||
@@ -103,16 +103,16 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/*\n" \
|
||||
" float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
|
||||
" color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \
|
||||
"\n" \
|
||||
" for (int ny = miny; ny < maxy; ny += step) {\n" \
|
||||
" for (int nx = minx; nx < maxx; nx += step) {\n" \
|
||||
" if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {\n" \
|
||||
" inputCoordinate.s0 = nx - offsetInput.s0;\n" \
|
||||
" inputCoordinate.s1 = ny - offsetInput.s1;\n" \
|
||||
" tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
|
||||
" if (size > threshold && tempSize > threshold) {\n" \
|
||||
" float dx = nx - realCoordinate.s0;\n" \
|
||||
" float dy = ny - realCoordinate.s1;\n" \
|
||||
" if (dx != 0 || dy != 0) {\n" \
|
||||
" if (size > threshold) {\n" \
|
||||
" for (int ny = miny; ny < maxy; ny += step) {\n" \
|
||||
" inputCoordinate.s1 = ny - offsetInput.s1;\n" \
|
||||
" float dy = ny - realCoordinate.s1;\n" \
|
||||
" for (int nx = minx; nx < maxx; nx += step) {\n" \
|
||||
" float dx = nx - realCoordinate.s0;\n" \
|
||||
" if (dx != 0 || dy != 0) {\n" \
|
||||
" inputCoordinate.s0 = nx - offsetInput.s0;\n" \
|
||||
" tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
|
||||
" if (tempSize > threshold) {\n" \
|
||||
" if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {\n" \
|
||||
" float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};\n" \
|
||||
" bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \
|
||||
|
||||
@@ -62,8 +62,29 @@ void VariableSizeBokehBlurOperation::initExecution()
|
||||
QualityStepHelper::initExecution(COM_QH_INCREASE);
|
||||
}
|
||||
|
||||
void *VariableSizeBokehBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
|
||||
{
|
||||
MemoryBuffer** result = new MemoryBuffer*[3];
|
||||
result[0] = (MemoryBuffer*)this->m_inputProgram->initializeTileData(rect, memoryBuffers);
|
||||
result[1] = (MemoryBuffer*)this->m_inputBokehProgram->initializeTileData(rect, memoryBuffers);
|
||||
result[2] = (MemoryBuffer*)this->m_inputSizeProgram->initializeTileData(rect, memoryBuffers);
|
||||
return result;
|
||||
}
|
||||
|
||||
void VariableSizeBokehBlurOperation::deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data)
|
||||
{
|
||||
MemoryBuffer** result = (MemoryBuffer**)data;
|
||||
delete[] result;
|
||||
}
|
||||
|
||||
void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, MemoryBuffer *inputBuffers[], void *data)
|
||||
{
|
||||
MemoryBuffer** buffers = (MemoryBuffer**)data;
|
||||
MemoryBuffer* inputProgramBuffer = buffers[0];
|
||||
MemoryBuffer* inputBokehBuffer = buffers[1];
|
||||
MemoryBuffer* inputSizeBuffer = buffers[2];
|
||||
float* inputSizeFloatBuffer = inputSizeBuffer->getBuffer();
|
||||
float* inputProgramFloatBuffer = inputProgramBuffer->getBuffer();
|
||||
float readColor[4];
|
||||
float bokeh[4];
|
||||
float tempSize[4];
|
||||
@@ -84,32 +105,37 @@ void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, Me
|
||||
int maxy = MIN2(y + this->m_maxBlur, m_height);
|
||||
#endif
|
||||
{
|
||||
this->m_inputSizeProgram->read(tempSize, x, y, COM_PS_NEAREST, inputBuffers);
|
||||
this->m_inputProgram->read(readColor, x, y, COM_PS_NEAREST, inputBuffers);
|
||||
inputSizeBuffer->readNoCheck(tempSize, x, y);
|
||||
inputProgramBuffer->readNoCheck(readColor, x, y);
|
||||
|
||||
add_v4_v4(color_accum, readColor);
|
||||
add_v4_fl(multiplier_accum, 1.0f);
|
||||
float sizeCenter = tempSize[0];
|
||||
|
||||
for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) {
|
||||
for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) {
|
||||
if (nx >= 0 && nx < this->getWidth() && ny >= 0 && ny < getHeight()) {
|
||||
this->m_inputSizeProgram->read(tempSize, nx, ny, COM_PS_NEAREST, inputBuffers);
|
||||
float size = tempSize[0];
|
||||
float fsize = fabsf(size);
|
||||
if (sizeCenter > this->m_threshold && size > this->m_threshold) {
|
||||
float dx = nx - x;
|
||||
float dy = ny - y;
|
||||
if (nx == x && ny == y) {
|
||||
}
|
||||
else if (fsize > fabsf(dx) && fsize > fabsf(dy)) {
|
||||
float u = (256 + (dx/size) * 256);
|
||||
float v = (256 + (dy/size) * 256);
|
||||
this->m_inputBokehProgram->read(bokeh, u, v, COM_PS_NEAREST, inputBuffers);
|
||||
this->m_inputProgram->read(readColor, nx, ny, COM_PS_NEAREST, inputBuffers);
|
||||
madd_v4_v4v4(color_accum, bokeh, readColor);
|
||||
add_v4_v4(multiplier_accum, bokeh);
|
||||
const int addXStep = QualityStepHelper::getStep()*COM_NUMBER_OF_CHANNELS;
|
||||
|
||||
if (sizeCenter > this->m_threshold) {
|
||||
for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) {
|
||||
float dy = ny - y;
|
||||
int offsetNy = ny * inputSizeBuffer->getWidth() * COM_NUMBER_OF_CHANNELS;
|
||||
int offsetNxNy = offsetNy + (minx*COM_NUMBER_OF_CHANNELS);
|
||||
for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) {
|
||||
if (nx != x || ny != y)
|
||||
{
|
||||
float size = inputSizeFloatBuffer[offsetNxNy];
|
||||
if (size > this->m_threshold) {
|
||||
float fsize = fabsf(size);
|
||||
float dx = nx - x;
|
||||
if (fsize > fabsf(dx) && fsize > fabsf(dy)) {
|
||||
float u = (256.0f + (dx/size) * 256.0f);
|
||||
float v = (256.0f + (dy/size) * 256.0f);
|
||||
inputBokehBuffer->readNoCheck(bokeh, u, v);
|
||||
madd_v4_v4v4(color_accum, bokeh, &inputProgramFloatBuffer[offsetNxNy]);
|
||||
add_v4_v4(multiplier_accum, bokeh);
|
||||
}
|
||||
}
|
||||
}
|
||||
offsetNxNy += addXStep;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +50,10 @@ public:
|
||||
*/
|
||||
void initExecution();
|
||||
|
||||
void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers);
|
||||
|
||||
void deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data);
|
||||
|
||||
/**
|
||||
* Deinitialize the execution
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user