Compositor:

Added OpenCL kernel for the directional blur.

This operation always uses the full input image. In the current
implementation this input image is not cached on the device.

Future enhancement could be to cache it on the available opencl devices
This commit is contained in:
Jeroen Bakker
2012-07-11 19:32:32 +00:00
parent 5aa2670d4a
commit b63b8ea69d
5 changed files with 167 additions and 5 deletions

View File

@@ -37,6 +37,7 @@ void DirectionalBlurNode::convertToOperations(ExecutionSystem *graph, Compositor
DirectionalBlurOperation *operation = new DirectionalBlurOperation();
operation->setQuality(context->getQuality());
operation->setData(data);
operation->setbNode(this->getbNode());
this->getInputSocket(0)->relinkConnections(operation->getInputSocket(0), 0, graph);
this->getOutputSocket(0)->relinkConnections(operation->getOutputSocket());
graph->addOperation(operation);

View File

@@ -22,7 +22,7 @@
#include "COM_DirectionalBlurOperation.h"
#include "BLI_math.h"
#include "COM_OpenCLDevice.h"
extern "C" {
#include "RE_pipeline.h"
}
@@ -33,6 +33,7 @@ DirectionalBlurOperation::DirectionalBlurOperation() : NodeOperation()
this->addOutputSocket(COM_DT_COLOR);
this->setComplex(true);
this->setOpenCL(true);
this->m_inputProgram = NULL;
}
@@ -97,9 +98,35 @@ void DirectionalBlurOperation::executePixel(float *color, int x, int y, MemoryBu
lsc += this->m_sc;
}
mul_v4_v4fl(color, col2, 1.0f / iterations);
mul_v4_v4fl(color, col2, 1.0f / (iterations+1));
}
void DirectionalBlurOperation::executeOpenCL(OpenCLDevice* device,
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
list<cl_kernel> *clKernelsToCleanUp)
{
cl_kernel directionalBlurKernel = device->COM_clCreateKernel("directionalBlurKernel", NULL);
cl_int iterations = pow(2.0f, this->m_data->iter);
cl_float2 ltxy = {this->m_tx, this->m_ty};
cl_float2 centerpix = {this->m_center_x_pix, this->m_center_y_pix};
cl_float lsc = this->m_sc;
cl_float lrot = this->m_rot;
device->COM_clAttachMemoryBufferToKernelParameter(directionalBlurKernel, 0, -1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
device->COM_clAttachOutputMemoryBufferToKernelParameter(directionalBlurKernel, 1, clOutputBuffer);
device->COM_clAttachMemoryBufferOffsetToKernelParameter(directionalBlurKernel, 2, outputMemoryBuffer);
clSetKernelArg(directionalBlurKernel, 3, sizeof(cl_int), &iterations);
clSetKernelArg(directionalBlurKernel, 4, sizeof(cl_float), &lsc);
clSetKernelArg(directionalBlurKernel, 5, sizeof(cl_float), &lrot);
clSetKernelArg(directionalBlurKernel, 6, sizeof(cl_float2), &ltxy);
clSetKernelArg(directionalBlurKernel, 7, sizeof(cl_float2), &centerpix);
device->COM_clEnqueueRange(directionalBlurKernel, outputMemoryBuffer, 8, this);
}
void DirectionalBlurOperation::deinitExecution()
{
this->m_inputProgram = NULL;

View File

@@ -55,5 +55,11 @@ public:
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
void setData(NodeDBlurData *data) { this->m_data = data; }
void executeOpenCL(OpenCLDevice* device,
MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
list<cl_kernel> *clKernelsToCleanUp);
};
#endif

View File

@@ -1,7 +1,30 @@
/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Contributor:
* Jeroen Bakker
* Monique Dewanchand
*/
/// This file contains all opencl kernels for node-operation implementations
// Global SAMPLERS
const sampler_t SAMPLER_NEAREST = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
const sampler_t SAMPLER_NEAREST = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
const sampler_t SAMPLER_NEAREST_CLAMP = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__constant const int2 zero = {0,0};
@@ -168,3 +191,44 @@ __kernel void erodeKernel(__read_only image2d_t inputImage, __write_only image2
float4 color = {value,0.0f,0.0f,0.0f};
write_imagef(output, coords, color);
}
// KERNEL --- DIRECTIONAL BLUR ---
__kernel void directionalBlurKernel(__read_only image2d_t inputImage, __write_only image2d_t output,
int2 offsetOutput, int iterations, float scale, float rotation, float2 translate,
float2 center, int2 offset)
{
int2 coords = {get_global_id(0), get_global_id(1)};
coords += offset;
const int2 realCoordinate = coords + offsetOutput;
float4 col;
float2 ltxy = translate;
float lsc = scale;
float lrot = rotation;
col = read_imagef(inputImage, SAMPLER_NEAREST, realCoordinate);
/* blur the image */
for (int i = 0; i < iterations; ++i) {
const float cs = cos(lrot), ss = sin(lrot);
const float isc = 1.0f / (1.0f + lsc);
const float v = isc * (realCoordinate.s1 - center.s1) + ltxy.s1;
const float u = isc * (realCoordinate.s0 - center.s0) + ltxy.s0;
float2 uv = {
cs * u + ss * v + center.s0,
cs * v - ss * u + center.s1
};
col += read_imagef(inputImage, SAMPLER_NEAREST_CLAMP, uv);
/* double transformations */
ltxy += translate;
lrot += rotation;
lsc += scale;
}
col *= (1.0f/(iterations+1));
write_imagef(output, coords, col);
}

View File

@@ -1,9 +1,32 @@
/* clkernelstoh output of file <COM_OpenCLKernels_cl> */
const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all opencl kernels for node-operation implementations\n" \
const char * clkernelstoh_COM_OpenCLKernels_cl = "/*\n" \
" * Copyright 2011, Blender Foundation.\n" \
" *\n" \
" * This program is free software; you can redistribute it and/or\n" \
" * modify it under the terms of the GNU General Public License\n" \
" * as published by the Free Software Foundation; either version 2\n" \
" * of the License, or (at your option) any later version.\n" \
" *\n" \
" * This program is distributed in the hope that it will be useful,\n" \
" * but WITHOUT ANY WARRANTY; without even the implied warranty of\n" \
" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" \
" * GNU General Public License for more details.\n" \
" *\n" \
" * You should have received a copy of the GNU General Public License\n" \
" * along with this program; if not, write to the Free Software Foundation,\n" \
" * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\n" \
" *\n" \
" * Contributor:\n" \
" * Jeroen Bakker\n" \
" * Monique Dewanchand\n" \
" */\n" \
"\n" \
"/// This file contains all opencl kernels for node-operation implementations\n" \
"\n" \
"// Global SAMPLERS\n" \
"const sampler_t SAMPLER_NEAREST = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n" \
"const sampler_t SAMPLER_NEAREST = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n" \
"const sampler_t SAMPLER_NEAREST_CLAMP = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n" \
"\n" \
"__constant const int2 zero = {0,0};\n" \
"\n" \
@@ -170,4 +193,45 @@ const char * clkernelstoh_COM_OpenCLKernels_cl = "/// This file contains all ope
" float4 color = {value,0.0f,0.0f,0.0f};\n" \
" write_imagef(output, coords, color);\n" \
"}\n" \
"\n" \
"// KERNEL --- DIRECTIONAL BLUR ---\n" \
"__kernel void directionalBlurKernel(__read_only image2d_t inputImage, __write_only image2d_t output,\n" \
" int2 offsetOutput, int iterations, float scale, float rotation, float2 translate,\n" \
" float2 center, int2 offset)\n" \
"{\n" \
" int2 coords = {get_global_id(0), get_global_id(1)};\n" \
" coords += offset;\n" \
" const int2 realCoordinate = coords + offsetOutput;\n" \
"\n" \
" float4 col;\n" \
" float2 ltxy = translate;\n" \
" float lsc = scale;\n" \
" float lrot = rotation;\n" \
"\n" \
" col = read_imagef(inputImage, SAMPLER_NEAREST, realCoordinate);\n" \
"\n" \
" /* blur the image */\n" \
" for (int i = 0; i < iterations; ++i) {\n" \
" const float cs = cos(lrot), ss = sin(lrot);\n" \
" const float isc = 1.0f / (1.0f + lsc);\n" \
"\n" \
" const float v = isc * (realCoordinate.s1 - center.s1) + ltxy.s1;\n" \
" const float u = isc * (realCoordinate.s0 - center.s0) + ltxy.s0;\n" \
" float2 uv = {\n" \
" cs * u + ss * v + center.s0,\n" \
" cs * v - ss * u + center.s1\n" \
" };\n" \
"\n" \
" col += read_imagef(inputImage, SAMPLER_NEAREST_CLAMP, uv);\n" \
"\n" \
" /* double transformations */\n" \
" ltxy += translate;\n" \
" lrot += rotation;\n" \
" lsc += scale;\n" \
" }\n" \
"\n" \
" col *= (1.0f/(iterations+1));\n" \
"\n" \
" write_imagef(output, coords, col);\n" \
"}\n" \
"\0";