Compositor: speedup Kuwahara classic variation
Compute luminance only once per region and per pixel by means of vectorization. Measured time improvements on intel i9 CPU on a 1920 x 3199 image: - from 14.7s to 3.9s for full-frame compositor - from 50.1s to 15.3s for tiled compositor Pull Request: https://projects.blender.org/blender/blender/pulls/108859
This commit is contained in:
committed by
Habib Gahbiche
parent
7d935f94f3
commit
ab4286f940
@@ -4,6 +4,7 @@
|
||||
|
||||
#include "COM_KuwaharaClassicOperation.h"
|
||||
|
||||
#include "BLI_math_vector_types.hh"
|
||||
#include "IMB_colormanagement.h"
|
||||
|
||||
namespace blender::compositor {
|
||||
@@ -32,9 +33,106 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
|
||||
float y,
|
||||
PixelSampler sampler)
|
||||
{
|
||||
for (int ch = 0; ch < 3; ch++) {
|
||||
Vector<float3> mean(4, float3(0.0f));
|
||||
float sum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float var[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
int cnt[4] = {0, 0, 0, 0};
|
||||
|
||||
/* Split surroundings of pixel into 4 overlapping regions. */
|
||||
for (int dy = -kernel_size_; dy <= kernel_size_; dy++) {
|
||||
for (int dx = -kernel_size_; dx <= kernel_size_; dx++) {
|
||||
|
||||
int xx = x + dx;
|
||||
int yy = y + dy;
|
||||
if (xx >= 0 && yy >= 0 && xx < this->get_width() && yy < this->get_height()) {
|
||||
|
||||
float4 color;
|
||||
image_reader_->read_sampled(color, xx, yy, sampler);
|
||||
const float3 v = color.xyz();
|
||||
|
||||
const float lum = IMB_colormanagement_get_luminance(color);
|
||||
|
||||
if (dx <= 0 && dy <= 0) {
|
||||
mean[0] += v;
|
||||
sum[0] += lum;
|
||||
var[0] += lum * lum;
|
||||
cnt[0]++;
|
||||
}
|
||||
|
||||
if (dx >= 0 && dy <= 0) {
|
||||
mean[1] += v;
|
||||
sum[1] += lum;
|
||||
var[1] += lum * lum;
|
||||
cnt[1]++;
|
||||
}
|
||||
|
||||
if (dx <= 0 && dy >= 0) {
|
||||
mean[2] += v;
|
||||
sum[2] += lum;
|
||||
var[2] += lum * lum;
|
||||
cnt[2]++;
|
||||
}
|
||||
|
||||
if (dx >= 0 && dy >= 0) {
|
||||
mean[3] += v;
|
||||
sum[3] += lum;
|
||||
var[3] += lum * lum;
|
||||
cnt[3]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute region variances. */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
mean[i] = cnt[i] != 0 ? mean[i] / cnt[i] : float3{0.0f, 0.0f, 0.0f};
|
||||
sum[i] = cnt[i] != 0 ? sum[i] / cnt[i] : 0.0f;
|
||||
var[i] = cnt[i] != 0 ? var[i] / cnt[i] : 0.0f;
|
||||
const float temp = sum[i] * sum[i];
|
||||
var[i] = var[i] > temp ? sqrt(var[i] - temp) : 0.0f;
|
||||
}
|
||||
|
||||
/* Choose the region with lowest variance. */
|
||||
float min_var = FLT_MAX;
|
||||
int min_index = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (var[i] < min_var) {
|
||||
min_var = var[i];
|
||||
min_index = i;
|
||||
}
|
||||
}
|
||||
output[0] = mean[min_index].x;
|
||||
output[1] = mean[min_index].y;
|
||||
output[2] = mean[min_index].z;
|
||||
|
||||
/* No changes for alpha channel. */
|
||||
float tmp[4];
|
||||
image_reader_->read_sampled(tmp, x, y, sampler);
|
||||
output[3] = tmp[3];
|
||||
}
|
||||
|
||||
void KuwaharaClassicOperation::set_kernel_size(int kernel_size)
|
||||
{
|
||||
kernel_size_ = kernel_size;
|
||||
}
|
||||
|
||||
int KuwaharaClassicOperation::get_kernel_size()
|
||||
{
|
||||
return kernel_size_;
|
||||
}
|
||||
|
||||
void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs)
|
||||
{
|
||||
MemoryBuffer *image = inputs[0];
|
||||
|
||||
for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
|
||||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
|
||||
Vector<float3> mean(4, float3(0.0f));
|
||||
float sum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float mean[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float var[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
int cnt[4] = {0, 0, 0, 0};
|
||||
|
||||
@@ -44,10 +142,12 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
|
||||
|
||||
int xx = x + dx;
|
||||
int yy = y + dy;
|
||||
if (xx >= 0 && yy >= 0 && xx < this->get_width() && yy < this->get_height()) {
|
||||
float color[4];
|
||||
image_reader_->read_sampled(color, xx, yy, sampler);
|
||||
const float v = color[ch];
|
||||
if (xx >= 0 && yy >= 0 && xx < image->get_width() && yy < image->get_height()) {
|
||||
|
||||
float4 color;
|
||||
image->read_elem(xx, yy, &color.x);
|
||||
const float3 v = color.xyz();
|
||||
|
||||
const float lum = IMB_colormanagement_get_luminance(color);
|
||||
|
||||
if (dx <= 0 && dy <= 0) {
|
||||
@@ -83,7 +183,7 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
|
||||
|
||||
/* Compute region variances. */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
mean[i] = cnt[i] != 0 ? mean[i] / cnt[i] : 0.0f;
|
||||
mean[i] = cnt[i] != 0 ? mean[i] / cnt[i] : float3{0.0f, 0.0f, 0.0f};
|
||||
sum[i] = cnt[i] != 0 ? sum[i] / cnt[i] : 0.0f;
|
||||
var[i] = cnt[i] != 0 ? var[i] / cnt[i] : 0.0f;
|
||||
const float temp = sum[i] * sum[i];
|
||||
@@ -99,105 +199,12 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
|
||||
min_index = i;
|
||||
}
|
||||
}
|
||||
output[ch] = mean[min_index];
|
||||
}
|
||||
it.out[0] = mean[min_index].x;
|
||||
it.out[1] = mean[min_index].y;
|
||||
it.out[2] = mean[min_index].z;
|
||||
|
||||
/* No changes for alpha channel. */
|
||||
float tmp[4];
|
||||
image_reader_->read_sampled(tmp, x, y, sampler);
|
||||
output[3] = tmp[3];
|
||||
}
|
||||
|
||||
void KuwaharaClassicOperation::set_kernel_size(int kernel_size)
|
||||
{
|
||||
kernel_size_ = kernel_size;
|
||||
}
|
||||
|
||||
int KuwaharaClassicOperation::get_kernel_size()
|
||||
{
|
||||
return kernel_size_;
|
||||
}
|
||||
|
||||
void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs)
|
||||
{
|
||||
MemoryBuffer *image = inputs[0];
|
||||
|
||||
for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
|
||||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
/* No changes for alpha channel. */
|
||||
it.out[3] = image->get_value(x, y, 3);
|
||||
|
||||
for (int ch = 0; ch < 3; ch++) {
|
||||
float sum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float mean[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float var[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
int cnt[4] = {0, 0, 0, 0};
|
||||
|
||||
/* Split surroundings of pixel into 4 overlapping regions. */
|
||||
for (int dy = -kernel_size_; dy <= kernel_size_; dy++) {
|
||||
for (int dx = -kernel_size_; dx <= kernel_size_; dx++) {
|
||||
|
||||
int xx = x + dx;
|
||||
int yy = y + dy;
|
||||
if (xx >= 0 && yy >= 0 && xx < image->get_width() && yy < image->get_height()) {
|
||||
const float v = image->get_value(xx, yy, ch);
|
||||
float color[4];
|
||||
image->read_elem(xx, yy, color);
|
||||
const float lum = IMB_colormanagement_get_luminance(color);
|
||||
|
||||
if (dx <= 0 && dy <= 0) {
|
||||
mean[0] += v;
|
||||
sum[0] += lum;
|
||||
var[0] += lum * lum;
|
||||
cnt[0]++;
|
||||
}
|
||||
|
||||
if (dx >= 0 && dy <= 0) {
|
||||
mean[1] += v;
|
||||
sum[1] += lum;
|
||||
var[1] += lum * lum;
|
||||
cnt[1]++;
|
||||
}
|
||||
|
||||
if (dx <= 0 && dy >= 0) {
|
||||
mean[2] += v;
|
||||
sum[2] += lum;
|
||||
var[2] += lum * lum;
|
||||
cnt[2]++;
|
||||
}
|
||||
|
||||
if (dx >= 0 && dy >= 0) {
|
||||
mean[3] += v;
|
||||
sum[3] += lum;
|
||||
var[3] += lum * lum;
|
||||
cnt[3]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute region variances. */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
mean[i] = cnt[i] != 0 ? mean[i] / cnt[i] : 0.0f;
|
||||
sum[i] = cnt[i] != 0 ? sum[i] / cnt[i] : 0.0f;
|
||||
var[i] = cnt[i] != 0 ? var[i] / cnt[i] : 0.0f;
|
||||
const float temp = sum[i] * sum[i];
|
||||
var[i] = var[i] > temp ? sqrt(var[i] - temp) : 0.0f;
|
||||
}
|
||||
|
||||
/* Choose the region with lowest variance. */
|
||||
float min_var = FLT_MAX;
|
||||
int min_index = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (var[i] < min_var) {
|
||||
min_var = var[i];
|
||||
min_index = i;
|
||||
}
|
||||
}
|
||||
output->get_value(x, y, ch) = mean[min_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user