Compositor:

re-optimized the Defocus node.
 * localized MemoryBuffers
 * removed read(x,y) calls
 * shuffled some lines in the execute pixel
 * added a readNoCheck function to the memorybuffer (only use this when
you are certain you are reading a pixel inside the memorybuffer.
This commit is contained in:
Jeroen Bakker
2012-07-11 20:51:00 +00:00
parent 83d2314edf
commit 4fb850c72e
5 changed files with 79 additions and 42 deletions

View File

@@ -140,6 +140,13 @@ public:
}
}
inline void readNoCheck(float result[4], int x, int y) {
const int dx = x - this->m_rect.xmin;
const int dy = y - this->m_rect.ymin;
const int offset = (this->m_chunkWidth * dy + dx) * COM_NUMBER_OF_CHANNELS;
copy_v4_v4(result, &this->m_buffer[offset]);
}
void writePixel(int x, int y, const float color[4]);
void addPixel(int x, int y, const float color[4]);
inline void readCubic(float result[4], float x, float y)

View File

@@ -101,16 +101,16 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2
float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);
for (int ny = miny; ny < maxy; ny += step) {
for (int nx = minx; nx < maxx; nx += step) {
if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {
inputCoordinate.s0 = nx - offsetInput.s0;
inputCoordinate.s1 = ny - offsetInput.s1;
tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
if (size > threshold && tempSize > threshold) {
float dx = nx - realCoordinate.s0;
float dy = ny - realCoordinate.s1;
if (dx != 0 || dy != 0) {
if (size > threshold) {
for (int ny = miny; ny < maxy; ny += step) {
inputCoordinate.s1 = ny - offsetInput.s1;
float dy = ny - realCoordinate.s1;
for (int nx = minx; nx < maxx; nx += step) {
float dx = nx - realCoordinate.s0;
if (dx != 0 || dy != 0) {
inputCoordinate.s0 = nx - offsetInput.s0;
tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;
if (tempSize > threshold) {
if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {
float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};
bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);
@@ -121,8 +121,8 @@ __kernel void defocusKernel(__read_only image2d_t inputImage, __read_only image2
}
}
}
}
}
}
}
}
color = color_accum * (1.0f / multiplier_accum);

View File

@@ -103,16 +103,16 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/*\n" \
" float size = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
" color_accum = read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate);\n" \
"\n" \
" for (int ny = miny; ny < maxy; ny += step) {\n" \
" for (int nx = minx; nx < maxx; nx += step) {\n" \
" if (nx >= 0 && nx < dimension.s0 && ny >= 0 && ny < dimension.s1) {\n" \
" inputCoordinate.s0 = nx - offsetInput.s0;\n" \
" inputCoordinate.s1 = ny - offsetInput.s1;\n" \
" tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
" if (size > threshold && tempSize > threshold) {\n" \
" float dx = nx - realCoordinate.s0;\n" \
" float dy = ny - realCoordinate.s1;\n" \
" if (dx != 0 || dy != 0) {\n" \
" if (size > threshold) {\n" \
" for (int ny = miny; ny < maxy; ny += step) {\n" \
" inputCoordinate.s1 = ny - offsetInput.s1;\n" \
" float dy = ny - realCoordinate.s1;\n" \
" for (int nx = minx; nx < maxx; nx += step) {\n" \
" float dx = nx - realCoordinate.s0;\n" \
" if (dx != 0 || dy != 0) {\n" \
" inputCoordinate.s0 = nx - offsetInput.s0;\n" \
" tempSize = read_imagef(inputSize, SAMPLER_NEAREST, inputCoordinate).s0;\n" \
" if (tempSize > threshold) {\n" \
" if (tempSize >= fabs(dx) && tempSize >= fabs(dy)) {\n" \
" float2 uv = { 256.0f + dx * 256.0f / tempSize, 256.0f + dy * 256.0f / tempSize};\n" \
" bokeh = read_imagef(bokehImage, SAMPLER_NEAREST, uv);\n" \

View File

@@ -62,8 +62,29 @@ void VariableSizeBokehBlurOperation::initExecution()
QualityStepHelper::initExecution(COM_QH_INCREASE);
}
void *VariableSizeBokehBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
{
MemoryBuffer** result = new MemoryBuffer*[3];
result[0] = (MemoryBuffer*)this->m_inputProgram->initializeTileData(rect, memoryBuffers);
result[1] = (MemoryBuffer*)this->m_inputBokehProgram->initializeTileData(rect, memoryBuffers);
result[2] = (MemoryBuffer*)this->m_inputSizeProgram->initializeTileData(rect, memoryBuffers);
return result;
}
void VariableSizeBokehBlurOperation::deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data)
{
MemoryBuffer** result = (MemoryBuffer**)data;
delete[] result;
}
void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, MemoryBuffer *inputBuffers[], void *data)
{
MemoryBuffer** buffers = (MemoryBuffer**)data;
MemoryBuffer* inputProgramBuffer = buffers[0];
MemoryBuffer* inputBokehBuffer = buffers[1];
MemoryBuffer* inputSizeBuffer = buffers[2];
float* inputSizeFloatBuffer = inputSizeBuffer->getBuffer();
float* inputProgramFloatBuffer = inputProgramBuffer->getBuffer();
float readColor[4];
float bokeh[4];
float tempSize[4];
@@ -84,32 +105,37 @@ void VariableSizeBokehBlurOperation::executePixel(float *color, int x, int y, Me
int maxy = MIN2(y + this->m_maxBlur, m_height);
#endif
{
this->m_inputSizeProgram->read(tempSize, x, y, COM_PS_NEAREST, inputBuffers);
this->m_inputProgram->read(readColor, x, y, COM_PS_NEAREST, inputBuffers);
inputSizeBuffer->readNoCheck(tempSize, x, y);
inputProgramBuffer->readNoCheck(readColor, x, y);
add_v4_v4(color_accum, readColor);
add_v4_fl(multiplier_accum, 1.0f);
float sizeCenter = tempSize[0];
for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) {
for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) {
if (nx >= 0 && nx < this->getWidth() && ny >= 0 && ny < getHeight()) {
this->m_inputSizeProgram->read(tempSize, nx, ny, COM_PS_NEAREST, inputBuffers);
float size = tempSize[0];
float fsize = fabsf(size);
if (sizeCenter > this->m_threshold && size > this->m_threshold) {
float dx = nx - x;
float dy = ny - y;
if (nx == x && ny == y) {
}
else if (fsize > fabsf(dx) && fsize > fabsf(dy)) {
float u = (256 + (dx/size) * 256);
float v = (256 + (dy/size) * 256);
this->m_inputBokehProgram->read(bokeh, u, v, COM_PS_NEAREST, inputBuffers);
this->m_inputProgram->read(readColor, nx, ny, COM_PS_NEAREST, inputBuffers);
madd_v4_v4v4(color_accum, bokeh, readColor);
add_v4_v4(multiplier_accum, bokeh);
const int addXStep = QualityStepHelper::getStep()*COM_NUMBER_OF_CHANNELS;
if (sizeCenter > this->m_threshold) {
for (int ny = miny; ny < maxy; ny += QualityStepHelper::getStep()) {
float dy = ny - y;
int offsetNy = ny * inputSizeBuffer->getWidth() * COM_NUMBER_OF_CHANNELS;
int offsetNxNy = offsetNy + (minx*COM_NUMBER_OF_CHANNELS);
for (int nx = minx; nx < maxx; nx += QualityStepHelper::getStep()) {
if (nx != x || ny != y)
{
float size = inputSizeFloatBuffer[offsetNxNy];
if (size > this->m_threshold) {
float fsize = fabsf(size);
float dx = nx - x;
if (fsize > fabsf(dx) && fsize > fabsf(dy)) {
float u = (256.0f + (dx/size) * 256.0f);
float v = (256.0f + (dy/size) * 256.0f);
inputBokehBuffer->readNoCheck(bokeh, u, v);
madd_v4_v4v4(color_accum, bokeh, &inputProgramFloatBuffer[offsetNxNy]);
add_v4_v4(multiplier_accum, bokeh);
}
}
}
offsetNxNy += addXStep;
}
}
}

View File

@@ -50,6 +50,10 @@ public:
*/
void initExecution();
void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers);
void deinitializeTileData(rcti *rect, MemoryBuffer **memoryBuffers, void *data);
/**
* Deinitialize the execution
*/