VSE: reduce effects code duplication, making gaussian blur faster in the process
Now that the code is in C++, quite some duplication between "byte" and "float" effect code paths can be reduced (easier than it was in C times). So I did that, removing about 400 lines of code. In that process I accidentally made Gaussian Blur faster, since while reducing the amount of code I noticed it was doing some things sub-optimally (calculated kernel tables for each job, etc.). Applying 100x100 gaussian blur on 4K UHD resolution image strip on Ryzen 5950X went 630ms -> 450ms. Pull Request: https://projects.blender.org/blender/blender/pulls/116089
This commit is contained in:
committed by
Aras Pranckevicius
parent
4a34dcbb69
commit
5cac8e2bb4
@@ -64,6 +64,8 @@
|
||||
#include "strip_time.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
using blender::float4;
|
||||
|
||||
static SeqEffectHandle get_sequence_effect_impl(int seq_type);
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -120,6 +122,44 @@ static void slice_get_float_buffers(const SeqRenderData *context,
|
||||
}
|
||||
}
|
||||
|
||||
static float4 load_premul_pixel(const uchar *ptr)
|
||||
{
|
||||
float4 res;
|
||||
straight_uchar_to_premul_float(res, ptr);
|
||||
return res;
|
||||
}
|
||||
|
||||
static float4 load_premul_pixel(const float *ptr)
|
||||
{
|
||||
return float4(ptr);
|
||||
}
|
||||
|
||||
static void store_premul_pixel(const float4 &pix, uchar *dst)
|
||||
{
|
||||
premul_float_to_straight_uchar(dst, pix);
|
||||
}
|
||||
|
||||
static void store_premul_pixel(const float4 &pix, float *dst)
|
||||
{
|
||||
*reinterpret_cast<float4 *>(dst) = pix;
|
||||
}
|
||||
|
||||
static void store_opaque_black_pixel(uchar *dst)
|
||||
{
|
||||
dst[0] = 0;
|
||||
dst[1] = 0;
|
||||
dst[2] = 0;
|
||||
dst[3] = 255;
|
||||
}
|
||||
|
||||
static void store_opaque_black_pixel(float *dst)
|
||||
{
|
||||
dst[0] = 0.0f;
|
||||
dst[1] = 0.0f;
|
||||
dst[2] = 0.0f;
|
||||
dst[3] = 1.0f;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -203,72 +243,32 @@ static void init_alpha_over_or_under(Sequence *seq)
|
||||
seq->seq1 = seq2;
|
||||
}
|
||||
|
||||
static void do_alphaover_effect_byte(
|
||||
float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out)
|
||||
/* dst = src1 over src2 (alpha from src1) */
|
||||
template<typename T>
|
||||
static void do_alphaover_effect(
|
||||
float fac, int width, int height, const T *src1, const T *src2, T *dst)
|
||||
{
|
||||
uchar *cp1 = rect1;
|
||||
uchar *cp2 = rect2;
|
||||
uchar *rt = out;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
/* rt = rt1 over rt2 (alpha from rt1) */
|
||||
|
||||
float tempc[4], rt1[4], rt2[4];
|
||||
straight_uchar_to_premul_float(rt1, cp1);
|
||||
straight_uchar_to_premul_float(rt2, cp2);
|
||||
|
||||
float mfac = 1.0f - fac * rt1[3];
|
||||
|
||||
if (fac <= 0.0f) {
|
||||
*((uint *)rt) = *((uint *)cp2);
|
||||
}
|
||||
else if (mfac <= 0.0f) {
|
||||
*((uint *)rt) = *((uint *)cp1);
|
||||
}
|
||||
else {
|
||||
tempc[0] = fac * rt1[0] + mfac * rt2[0];
|
||||
tempc[1] = fac * rt1[1] + mfac * rt2[1];
|
||||
tempc[2] = fac * rt1[2] + mfac * rt2[2];
|
||||
tempc[3] = fac * rt1[3] + mfac * rt2[3];
|
||||
|
||||
premul_float_to_straight_uchar(rt, tempc);
|
||||
}
|
||||
cp1 += 4;
|
||||
cp2 += 4;
|
||||
rt += 4;
|
||||
}
|
||||
if (fac <= 0.0f) {
|
||||
memcpy(dst, src2, sizeof(T) * 4 * width * height);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void do_alphaover_effect_float(
|
||||
float fac, int x, int y, float *rect1, float *rect2, float *out)
|
||||
{
|
||||
float *rt1 = rect1;
|
||||
float *rt2 = rect2;
|
||||
float *rt = out;
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float4 col1 = load_premul_pixel(src1);
|
||||
float mfac = 1.0f - fac * col1.w;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
/* rt = rt1 over rt2 (alpha from rt1) */
|
||||
|
||||
float mfac = 1.0f - (fac * rt1[3]);
|
||||
|
||||
if (fac <= 0.0f) {
|
||||
memcpy(rt, rt2, sizeof(float[4]));
|
||||
}
|
||||
else if (mfac <= 0) {
|
||||
memcpy(rt, rt1, sizeof(float[4]));
|
||||
if (mfac <= 0.0f) {
|
||||
memcpy(dst, src1, sizeof(T) * 4);
|
||||
}
|
||||
else {
|
||||
rt[0] = fac * rt1[0] + mfac * rt2[0];
|
||||
rt[1] = fac * rt1[1] + mfac * rt2[1];
|
||||
rt[2] = fac * rt1[2] + mfac * rt2[2];
|
||||
rt[3] = fac * rt1[3] + mfac * rt2[3];
|
||||
float4 col2 = load_premul_pixel(src2);
|
||||
float4 col = fac * col1 + mfac * col2;
|
||||
store_premul_pixel(col, dst);
|
||||
}
|
||||
rt1 += 4;
|
||||
rt2 += 4;
|
||||
rt += 4;
|
||||
src1 += 4;
|
||||
src2 += 4;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -290,7 +290,7 @@ static void do_alphaover_effect(const SeqRenderData *context,
|
||||
slice_get_float_buffers(
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_alphaover_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
do_alphaover_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
}
|
||||
else {
|
||||
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
@@ -298,7 +298,7 @@ static void do_alphaover_effect(const SeqRenderData *context,
|
||||
slice_get_byte_buffers(
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_alphaover_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
do_alphaover_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -308,88 +308,34 @@ static void do_alphaover_effect(const SeqRenderData *context,
|
||||
/** \name Alpha Under Effect
|
||||
* \{ */
|
||||
|
||||
static void do_alphaunder_effect_byte(
|
||||
float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out)
|
||||
/* dst = src1 under src2 (alpha from src2) */
|
||||
template<typename T>
|
||||
static void do_alphaunder_effect(
|
||||
float fac, int width, int height, const T *src1, const T *src2, T *dst)
|
||||
{
|
||||
uchar *cp1 = rect1;
|
||||
uchar *cp2 = rect2;
|
||||
uchar *rt = out;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
/* rt = rt1 under rt2 (alpha from rt2) */
|
||||
|
||||
float tempc[4], rt1[4], rt2[4];
|
||||
straight_uchar_to_premul_float(rt1, cp1);
|
||||
straight_uchar_to_premul_float(rt2, cp2);
|
||||
|
||||
/* this complex optimization is because the
|
||||
* 'skybuf' can be crossed in
|
||||
*/
|
||||
if (rt2[3] <= 0.0f && fac >= 1.0f) {
|
||||
*((uint *)rt) = *((uint *)cp1);
|
||||
}
|
||||
else if (rt2[3] >= 1.0f) {
|
||||
*((uint *)rt) = *((uint *)cp2);
|
||||
}
|
||||
else {
|
||||
float temp_fac = (fac * (1.0f - rt2[3]));
|
||||
|
||||
if (fac <= 0) {
|
||||
*((uint *)rt) = *((uint *)cp2);
|
||||
}
|
||||
else {
|
||||
tempc[0] = (temp_fac * rt1[0] + rt2[0]);
|
||||
tempc[1] = (temp_fac * rt1[1] + rt2[1]);
|
||||
tempc[2] = (temp_fac * rt1[2] + rt2[2]);
|
||||
tempc[3] = (temp_fac * rt1[3] + rt2[3]);
|
||||
|
||||
premul_float_to_straight_uchar(rt, tempc);
|
||||
}
|
||||
}
|
||||
cp1 += 4;
|
||||
cp2 += 4;
|
||||
rt += 4;
|
||||
}
|
||||
if (fac >= 1.0f) {
|
||||
memcpy(dst, src1, sizeof(T) * 4 * width * height);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void do_alphaunder_effect_float(
|
||||
float fac, int x, int y, float *rect1, float *rect2, float *out)
|
||||
{
|
||||
float *rt1 = rect1;
|
||||
float *rt2 = rect2;
|
||||
float *rt = out;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
/* rt = rt1 under rt2 (alpha from rt2) */
|
||||
|
||||
/* this complex optimization is because the
|
||||
* 'skybuf' can be crossed in
|
||||
*/
|
||||
if (rt2[3] <= 0 && fac >= 1.0f) {
|
||||
memcpy(rt, rt1, sizeof(float[4]));
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float4 col2 = load_premul_pixel(src2);
|
||||
if (col2.w <= 0.0f) {
|
||||
memcpy(dst, src1, sizeof(T) * 4);
|
||||
}
|
||||
else if (rt2[3] >= 1.0f) {
|
||||
memcpy(rt, rt2, sizeof(float[4]));
|
||||
else if (col2.w >= 1.0f || fac <= 0.0f) {
|
||||
memcpy(dst, src2, sizeof(T) * 4);
|
||||
}
|
||||
else {
|
||||
float temp_fac = fac * (1.0f - rt2[3]);
|
||||
|
||||
if (fac == 0) {
|
||||
memcpy(rt, rt2, sizeof(float[4]));
|
||||
}
|
||||
else {
|
||||
rt[0] = temp_fac * rt1[0] + rt2[0];
|
||||
rt[1] = temp_fac * rt1[1] + rt2[1];
|
||||
rt[2] = temp_fac * rt1[2] + rt2[2];
|
||||
rt[3] = temp_fac * rt1[3] + rt2[3];
|
||||
}
|
||||
float mfac = fac * (1.0f - col2.w);
|
||||
float4 col1 = load_premul_pixel(src1);
|
||||
float4 col = mfac * col1 + col2;
|
||||
store_premul_pixel(col, dst);
|
||||
}
|
||||
rt1 += 4;
|
||||
rt2 += 4;
|
||||
rt += 4;
|
||||
src1 += 4;
|
||||
src2 += 4;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -411,7 +357,7 @@ static void do_alphaunder_effect(const SeqRenderData *context,
|
||||
slice_get_float_buffers(
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_alphaunder_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
do_alphaunder_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
}
|
||||
else {
|
||||
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
@@ -419,7 +365,7 @@ static void do_alphaunder_effect(const SeqRenderData *context,
|
||||
slice_get_byte_buffers(
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_alphaunder_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
do_alphaunder_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -527,53 +473,24 @@ static float invGammaCorrect(float c)
|
||||
return sqrtf_signed(c);
|
||||
}
|
||||
|
||||
static void do_gammacross_effect_byte(
|
||||
float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out)
|
||||
template<typename T>
|
||||
static void do_gammacross_effect(
|
||||
float fac, int width, int height, const T *src1, const T *src2, T *dst)
|
||||
{
|
||||
uchar *cp1 = rect1;
|
||||
uchar *cp2 = rect2;
|
||||
uchar *rt = out;
|
||||
|
||||
float mfac = 1.0f - fac;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
float rt1[4], rt2[4], tempc[4];
|
||||
|
||||
straight_uchar_to_premul_float(rt1, cp1);
|
||||
straight_uchar_to_premul_float(rt2, cp2);
|
||||
|
||||
tempc[0] = gammaCorrect(mfac * invGammaCorrect(rt1[0]) + fac * invGammaCorrect(rt2[0]));
|
||||
tempc[1] = gammaCorrect(mfac * invGammaCorrect(rt1[1]) + fac * invGammaCorrect(rt2[1]));
|
||||
tempc[2] = gammaCorrect(mfac * invGammaCorrect(rt1[2]) + fac * invGammaCorrect(rt2[2]));
|
||||
tempc[3] = gammaCorrect(mfac * invGammaCorrect(rt1[3]) + fac * invGammaCorrect(rt2[3]));
|
||||
|
||||
premul_float_to_straight_uchar(rt, tempc);
|
||||
cp1 += 4;
|
||||
cp2 += 4;
|
||||
rt += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void do_gammacross_effect_float(
|
||||
float fac, int x, int y, float *rect1, float *rect2, float *out)
|
||||
{
|
||||
float *rt1 = rect1;
|
||||
float *rt2 = rect2;
|
||||
float *rt = out;
|
||||
|
||||
float mfac = 1.0f - fac;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
rt[0] = gammaCorrect(mfac * invGammaCorrect(rt1[0]) + fac * invGammaCorrect(rt2[0]));
|
||||
rt[1] = gammaCorrect(mfac * invGammaCorrect(rt1[1]) + fac * invGammaCorrect(rt2[1]));
|
||||
rt[2] = gammaCorrect(mfac * invGammaCorrect(rt1[2]) + fac * invGammaCorrect(rt2[2]));
|
||||
rt[3] = gammaCorrect(mfac * invGammaCorrect(rt1[3]) + fac * invGammaCorrect(rt2[3]));
|
||||
rt1 += 4;
|
||||
rt2 += 4;
|
||||
rt += 4;
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float4 col1 = load_premul_pixel(src1);
|
||||
float4 col2 = load_premul_pixel(src2);
|
||||
float4 col;
|
||||
for (int c = 0; c < 4; ++c) {
|
||||
col[c] = gammaCorrect(mfac * invGammaCorrect(col1[c]) + fac * invGammaCorrect(col2[c]));
|
||||
}
|
||||
store_premul_pixel(col, dst);
|
||||
src1 += 4;
|
||||
src2 += 4;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -604,7 +521,7 @@ static void do_gammacross_effect(const SeqRenderData *context,
|
||||
slice_get_float_buffers(
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_gammacross_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
do_gammacross_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
}
|
||||
else {
|
||||
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
@@ -612,7 +529,7 @@ static void do_gammacross_effect(const SeqRenderData *context,
|
||||
slice_get_byte_buffers(
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_gammacross_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
do_gammacross_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -943,57 +860,21 @@ static void do_mul_effect(const SeqRenderData *context,
|
||||
/** \name Blend Mode Effect
|
||||
* \{ */
|
||||
|
||||
using IMB_blend_func_byte = void (*)(uchar *dst, const uchar *src1, const uchar *src2);
|
||||
using IMB_blend_func_float = void (*)(float *dst, const float *src1, const float *src2);
|
||||
|
||||
BLI_INLINE void apply_blend_function_byte(float fac,
|
||||
int x,
|
||||
int y,
|
||||
uchar *rect1,
|
||||
uchar *rect2,
|
||||
uchar *out,
|
||||
IMB_blend_func_byte blend_function)
|
||||
/* blend_function has to be: void (T* dst, const T *src1, const T *src2) */
|
||||
template<typename T, typename Func>
|
||||
static void apply_blend_function(
|
||||
float fac, int width, int height, const T *src1, T *src2, T *dst, Func blend_function)
|
||||
{
|
||||
uchar *rt1 = rect1;
|
||||
uchar *rt2 = rect2;
|
||||
uchar *rt = out;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
uint achannel = rt2[3];
|
||||
rt2[3] = uint(achannel) * fac;
|
||||
blend_function(rt, rt1, rt2);
|
||||
rt2[3] = achannel;
|
||||
rt[3] = rt1[3];
|
||||
rt1 += 4;
|
||||
rt2 += 4;
|
||||
rt += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BLI_INLINE void apply_blend_function_float(float fac,
|
||||
int x,
|
||||
int y,
|
||||
float *rect1,
|
||||
float *rect2,
|
||||
float *out,
|
||||
IMB_blend_func_float blend_function)
|
||||
{
|
||||
float *rt1 = rect1;
|
||||
float *rt2 = rect2;
|
||||
float *rt = out;
|
||||
|
||||
for (int i = 0; i < y; i++) {
|
||||
for (int j = 0; j < x; j++) {
|
||||
float achannel = rt2[3];
|
||||
rt2[3] = achannel * fac;
|
||||
blend_function(rt, rt1, rt2);
|
||||
rt2[3] = achannel;
|
||||
rt[3] = rt1[3];
|
||||
rt1 += 4;
|
||||
rt2 += 4;
|
||||
rt += 4;
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
T achannel = src2[3];
|
||||
src2[3] = T(achannel * fac);
|
||||
blend_function(dst, src1, src2);
|
||||
src2[3] = achannel;
|
||||
dst[3] = src1[3];
|
||||
src1 += 4;
|
||||
src2 += 4;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1003,67 +884,67 @@ static void do_blend_effect_float(
|
||||
{
|
||||
switch (btype) {
|
||||
case SEQ_TYPE_ADD:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_add_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_add_float);
|
||||
break;
|
||||
case SEQ_TYPE_SUB:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_sub_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_sub_float);
|
||||
break;
|
||||
case SEQ_TYPE_MUL:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_mul_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_mul_float);
|
||||
break;
|
||||
case SEQ_TYPE_DARKEN:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_darken_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_darken_float);
|
||||
break;
|
||||
case SEQ_TYPE_COLOR_BURN:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_burn_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_burn_float);
|
||||
break;
|
||||
case SEQ_TYPE_LINEAR_BURN:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_linearburn_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearburn_float);
|
||||
break;
|
||||
case SEQ_TYPE_SCREEN:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_screen_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_screen_float);
|
||||
break;
|
||||
case SEQ_TYPE_LIGHTEN:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_lighten_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_lighten_float);
|
||||
break;
|
||||
case SEQ_TYPE_DODGE:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_dodge_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_dodge_float);
|
||||
break;
|
||||
case SEQ_TYPE_OVERLAY:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_overlay_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_overlay_float);
|
||||
break;
|
||||
case SEQ_TYPE_SOFT_LIGHT:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_softlight_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_softlight_float);
|
||||
break;
|
||||
case SEQ_TYPE_HARD_LIGHT:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_hardlight_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hardlight_float);
|
||||
break;
|
||||
case SEQ_TYPE_PIN_LIGHT:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_pinlight_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_pinlight_float);
|
||||
break;
|
||||
case SEQ_TYPE_LIN_LIGHT:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_linearlight_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearlight_float);
|
||||
break;
|
||||
case SEQ_TYPE_VIVID_LIGHT:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_vividlight_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_vividlight_float);
|
||||
break;
|
||||
case SEQ_TYPE_BLEND_COLOR:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_color_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_color_float);
|
||||
break;
|
||||
case SEQ_TYPE_HUE:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_hue_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hue_float);
|
||||
break;
|
||||
case SEQ_TYPE_SATURATION:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_saturation_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_saturation_float);
|
||||
break;
|
||||
case SEQ_TYPE_VALUE:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_luminosity_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_luminosity_float);
|
||||
break;
|
||||
case SEQ_TYPE_DIFFERENCE:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_difference_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_difference_float);
|
||||
break;
|
||||
case SEQ_TYPE_EXCLUSION:
|
||||
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_exclusion_float);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_exclusion_float);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -1075,67 +956,67 @@ static void do_blend_effect_byte(
|
||||
{
|
||||
switch (btype) {
|
||||
case SEQ_TYPE_ADD:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_add_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_add_byte);
|
||||
break;
|
||||
case SEQ_TYPE_SUB:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_sub_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_sub_byte);
|
||||
break;
|
||||
case SEQ_TYPE_MUL:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_mul_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_mul_byte);
|
||||
break;
|
||||
case SEQ_TYPE_DARKEN:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_darken_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_darken_byte);
|
||||
break;
|
||||
case SEQ_TYPE_COLOR_BURN:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_burn_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_burn_byte);
|
||||
break;
|
||||
case SEQ_TYPE_LINEAR_BURN:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_linearburn_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearburn_byte);
|
||||
break;
|
||||
case SEQ_TYPE_SCREEN:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_screen_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_screen_byte);
|
||||
break;
|
||||
case SEQ_TYPE_LIGHTEN:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_lighten_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_lighten_byte);
|
||||
break;
|
||||
case SEQ_TYPE_DODGE:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_dodge_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_dodge_byte);
|
||||
break;
|
||||
case SEQ_TYPE_OVERLAY:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_overlay_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_overlay_byte);
|
||||
break;
|
||||
case SEQ_TYPE_SOFT_LIGHT:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_softlight_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_softlight_byte);
|
||||
break;
|
||||
case SEQ_TYPE_HARD_LIGHT:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_hardlight_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hardlight_byte);
|
||||
break;
|
||||
case SEQ_TYPE_PIN_LIGHT:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_pinlight_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_pinlight_byte);
|
||||
break;
|
||||
case SEQ_TYPE_LIN_LIGHT:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_linearlight_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearlight_byte);
|
||||
break;
|
||||
case SEQ_TYPE_VIVID_LIGHT:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_vividlight_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_vividlight_byte);
|
||||
break;
|
||||
case SEQ_TYPE_BLEND_COLOR:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_color_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_color_byte);
|
||||
break;
|
||||
case SEQ_TYPE_HUE:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_hue_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hue_byte);
|
||||
break;
|
||||
case SEQ_TYPE_SATURATION:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_saturation_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_saturation_byte);
|
||||
break;
|
||||
case SEQ_TYPE_VALUE:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_luminosity_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_luminosity_byte);
|
||||
break;
|
||||
case SEQ_TYPE_DIFFERENCE:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_difference_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_difference_byte);
|
||||
break;
|
||||
case SEQ_TYPE_EXCLUSION:
|
||||
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_exclusion_byte);
|
||||
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_exclusion_byte);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -1516,58 +1397,38 @@ static void copy_wipe_effect(Sequence *dst, Sequence *src, const int /*flag*/)
|
||||
dst->effectdata = MEM_dupallocN(src->effectdata);
|
||||
}
|
||||
|
||||
static void do_wipe_effect_byte(const Sequence *seq,
|
||||
float fac,
|
||||
int width,
|
||||
int height,
|
||||
const uchar *rect1,
|
||||
const uchar *rect2,
|
||||
uchar *out)
|
||||
template<typename T>
|
||||
static void do_wipe_effect(
|
||||
const Sequence *seq, float fac, int width, int height, const T *rect1, const T *rect2, T *out)
|
||||
{
|
||||
using namespace blender;
|
||||
const WipeVars *wipe = (const WipeVars *)seq->effectdata;
|
||||
const WipeZone wipezone = precalc_wipe_zone(wipe, width, height);
|
||||
|
||||
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
|
||||
const uchar *cp1 = rect1 + y_range.first() * width * 4;
|
||||
const uchar *cp2 = rect2 + y_range.first() * width * 4;
|
||||
uchar *rt = out + y_range.first() * width * 4;
|
||||
const T *cp1 = rect1 ? rect1 + y_range.first() * width * 4 : nullptr;
|
||||
const T *cp2 = rect2 ? rect2 + y_range.first() * width * 4 : nullptr;
|
||||
T *rt = out + y_range.first() * width * 4;
|
||||
for (const int y : y_range) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float check = check_zone(&wipezone, x, y, fac);
|
||||
if (check) {
|
||||
if (cp1) {
|
||||
float rt1[4], rt2[4], tempc[4];
|
||||
|
||||
straight_uchar_to_premul_float(rt1, cp1);
|
||||
straight_uchar_to_premul_float(rt2, cp2);
|
||||
|
||||
tempc[0] = rt1[0] * check + rt2[0] * (1 - check);
|
||||
tempc[1] = rt1[1] * check + rt2[1] * (1 - check);
|
||||
tempc[2] = rt1[2] * check + rt2[2] * (1 - check);
|
||||
tempc[3] = rt1[3] * check + rt2[3] * (1 - check);
|
||||
|
||||
premul_float_to_straight_uchar(rt, tempc);
|
||||
float4 col1 = load_premul_pixel(cp1);
|
||||
float4 col2 = load_premul_pixel(cp2);
|
||||
float4 col = col1 * check + col2 * (1.0f - check);
|
||||
store_premul_pixel(col, rt);
|
||||
}
|
||||
else {
|
||||
rt[0] = 0;
|
||||
rt[1] = 0;
|
||||
rt[2] = 0;
|
||||
rt[3] = 255;
|
||||
store_opaque_black_pixel(rt);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (cp2) {
|
||||
rt[0] = cp2[0];
|
||||
rt[1] = cp2[1];
|
||||
rt[2] = cp2[2];
|
||||
rt[3] = cp2[3];
|
||||
memcpy(rt, cp2, sizeof(T) * 4);
|
||||
}
|
||||
else {
|
||||
rt[0] = 0;
|
||||
rt[1] = 0;
|
||||
rt[2] = 0;
|
||||
rt[3] = 255;
|
||||
store_opaque_black_pixel(rt);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1583,66 +1444,6 @@ static void do_wipe_effect_byte(const Sequence *seq,
|
||||
});
|
||||
}
|
||||
|
||||
static void do_wipe_effect_float(Sequence *seq,
|
||||
float fac,
|
||||
int width,
|
||||
int height,
|
||||
const float *rect1,
|
||||
const float *rect2,
|
||||
float *out)
|
||||
{
|
||||
using namespace blender;
|
||||
const WipeVars *wipe = (const WipeVars *)seq->effectdata;
|
||||
const WipeZone wipezone = precalc_wipe_zone(wipe, width, height);
|
||||
|
||||
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
|
||||
const float *rt1 = rect1 + y_range.first() * width * 4;
|
||||
const float *rt2 = rect2 + y_range.first() * width * 4;
|
||||
float *rt = out + y_range.first() * width * 4;
|
||||
for (const int y : y_range) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float check = check_zone(&wipezone, x, y, fac);
|
||||
if (check) {
|
||||
if (rt1) {
|
||||
rt[0] = rt1[0] * check + rt2[0] * (1 - check);
|
||||
rt[1] = rt1[1] * check + rt2[1] * (1 - check);
|
||||
rt[2] = rt1[2] * check + rt2[2] * (1 - check);
|
||||
rt[3] = rt1[3] * check + rt2[3] * (1 - check);
|
||||
}
|
||||
else {
|
||||
rt[0] = 0;
|
||||
rt[1] = 0;
|
||||
rt[2] = 0;
|
||||
rt[3] = 1.0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (rt2) {
|
||||
rt[0] = rt2[0];
|
||||
rt[1] = rt2[1];
|
||||
rt[2] = rt2[2];
|
||||
rt[3] = rt2[3];
|
||||
}
|
||||
else {
|
||||
rt[0] = 0;
|
||||
rt[1] = 0;
|
||||
rt[2] = 0;
|
||||
rt[3] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
rt += 4;
|
||||
if (rt1 != nullptr) {
|
||||
rt1 += 4;
|
||||
}
|
||||
if (rt2 != nullptr) {
|
||||
rt2 += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static ImBuf *do_wipe_effect(const SeqRenderData *context,
|
||||
Sequence *seq,
|
||||
float /*timeline_frame*/,
|
||||
@@ -1654,22 +1455,22 @@ static ImBuf *do_wipe_effect(const SeqRenderData *context,
|
||||
ImBuf *out = prepare_effect_imbufs(context, ibuf1, ibuf2, ibuf3);
|
||||
|
||||
if (out->float_buffer.data) {
|
||||
do_wipe_effect_float(seq,
|
||||
fac,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf1->float_buffer.data,
|
||||
ibuf2->float_buffer.data,
|
||||
out->float_buffer.data);
|
||||
do_wipe_effect(seq,
|
||||
fac,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf1->float_buffer.data,
|
||||
ibuf2->float_buffer.data,
|
||||
out->float_buffer.data);
|
||||
}
|
||||
else {
|
||||
do_wipe_effect_byte(seq,
|
||||
fac,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf1->byte_buffer.data,
|
||||
ibuf2->byte_buffer.data,
|
||||
out->byte_buffer.data);
|
||||
do_wipe_effect(seq,
|
||||
fac,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf1->byte_buffer.data,
|
||||
ibuf2->byte_buffer.data,
|
||||
out->byte_buffer.data);
|
||||
}
|
||||
|
||||
return out;
|
||||
@@ -1837,12 +1638,8 @@ static void do_transform_effect(const SeqRenderData *context,
|
||||
/** \name Glow Effect
|
||||
* \{ */
|
||||
|
||||
static void glow_blur_bitmap(const blender::float4 *src,
|
||||
blender::float4 *map,
|
||||
int width,
|
||||
int height,
|
||||
float blur,
|
||||
int quality)
|
||||
static void glow_blur_bitmap(
|
||||
const float4 *src, float4 *map, int width, int height, float blur, int quality)
|
||||
{
|
||||
using namespace blender;
|
||||
|
||||
@@ -1913,8 +1710,8 @@ static void glow_blur_bitmap(const blender::float4 *src,
|
||||
});
|
||||
}
|
||||
|
||||
static void blur_isolate_highlights(const blender::float4 *in,
|
||||
blender::float4 *out,
|
||||
static void blur_isolate_highlights(const float4 *in,
|
||||
float4 *out,
|
||||
int width,
|
||||
int height,
|
||||
float threshold,
|
||||
@@ -2525,7 +2322,7 @@ static void do_overdrop_effect(const SeqRenderData *context,
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_drop_effect_float(fac, x, y, rect1, rect2, rect_out);
|
||||
do_alphaover_effect_float(fac, x, y, rect1, rect2, rect_out);
|
||||
do_alphaover_effect(fac, x, y, rect1, rect2, rect_out);
|
||||
}
|
||||
else {
|
||||
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
@@ -2534,7 +2331,7 @@ static void do_overdrop_effect(const SeqRenderData *context,
|
||||
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_drop_effect_byte(fac, x, y, rect1, rect2, rect_out);
|
||||
do_alphaover_effect_byte(fac, x, y, rect1, rect2, rect_out);
|
||||
do_alphaover_effect(fac, x, y, rect1, rect2, rect_out);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2544,14 +2341,6 @@ static void do_overdrop_effect(const SeqRenderData *context,
|
||||
/** \name Gaussian Blur
|
||||
* \{ */
|
||||
|
||||
/* NOTE: This gaussian blur implementation accumulates values in the square
|
||||
* kernel rather that doing X direction and then Y direction because of the
|
||||
* lack of using multiple-staged filters.
|
||||
*
|
||||
* Once we can we'll implement a way to apply filter as multiple stages we
|
||||
* can optimize hell of a lot in here.
|
||||
*/
|
||||
|
||||
static void init_gaussian_blur_effect(Sequence *seq)
|
||||
{
|
||||
if (seq->effectdata) {
|
||||
@@ -2585,346 +2374,92 @@ static int early_out_gaussian_blur(Sequence *seq, float /*fac*/)
|
||||
return EARLY_DO_EFFECT;
|
||||
}
|
||||
|
||||
/* TODO(sergey): De-duplicate with compositor. */
|
||||
static float *make_gaussian_blur_kernel(float rad, int size)
|
||||
static blender::Array<float> make_gaussian_blur_kernel(float rad, int size)
|
||||
{
|
||||
float *gausstab, sum, val;
|
||||
float fac;
|
||||
int i, n;
|
||||
int n = 2 * size + 1;
|
||||
blender::Array<float> gausstab(n);
|
||||
|
||||
n = 2 * size + 1;
|
||||
|
||||
gausstab = (float *)MEM_mallocN(sizeof(float) * n, __func__);
|
||||
|
||||
sum = 0.0f;
|
||||
fac = (rad > 0.0f ? 1.0f / rad : 0.0f);
|
||||
for (i = -size; i <= size; i++) {
|
||||
val = RE_filter_value(R_FILTER_GAUSS, float(i) * fac);
|
||||
float sum = 0.0f;
|
||||
float fac = (rad > 0.0f ? 1.0f / rad : 0.0f);
|
||||
for (int i = -size; i <= size; i++) {
|
||||
float val = RE_filter_value(R_FILTER_GAUSS, float(i) * fac);
|
||||
sum += val;
|
||||
gausstab[i + size] = val;
|
||||
}
|
||||
|
||||
sum = 1.0f / sum;
|
||||
for (i = 0; i < n; i++) {
|
||||
gausstab[i] *= sum;
|
||||
float inv_sum = 1.0f / sum;
|
||||
for (int i = 0; i < n; i++) {
|
||||
gausstab[i] *= inv_sum;
|
||||
}
|
||||
|
||||
return gausstab;
|
||||
}
|
||||
|
||||
static void do_gaussian_blur_effect_byte_x(Sequence *seq,
|
||||
int start_line,
|
||||
int x,
|
||||
int y,
|
||||
int frame_width,
|
||||
int /*frame_height*/,
|
||||
const uchar *rect,
|
||||
uchar *out)
|
||||
template<typename T>
|
||||
static void gaussian_blur_x(const blender::Array<float> &gausstab,
|
||||
int half_size,
|
||||
int start_line,
|
||||
int width,
|
||||
int height,
|
||||
int /*frame_height*/,
|
||||
const T *rect,
|
||||
T *dst)
|
||||
{
|
||||
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
|
||||
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
|
||||
const int size_x = int(data->size_x + 0.5f);
|
||||
int i, j;
|
||||
|
||||
/* Make gaussian weight table. */
|
||||
float *gausstab_x;
|
||||
gausstab_x = make_gaussian_blur_kernel(data->size_x, size_x);
|
||||
|
||||
for (i = 0; i < y; i++) {
|
||||
for (j = 0; j < x; j++) {
|
||||
int out_index = INDEX(j, i);
|
||||
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
dst += start_line * width * 4;
|
||||
for (int y = start_line; y < start_line + height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float4 accum(0.0f);
|
||||
float accum_weight = 0.0f;
|
||||
|
||||
for (int current_x = j - size_x; current_x <= j + size_x; current_x++) {
|
||||
if (current_x < 0 || current_x >= frame_width) {
|
||||
/* Out of bounds. */
|
||||
continue;
|
||||
}
|
||||
int index = INDEX(current_x, i + start_line);
|
||||
float weight = gausstab_x[current_x - j + size_x];
|
||||
accum[0] += rect[index] * weight;
|
||||
accum[1] += rect[index + 1] * weight;
|
||||
accum[2] += rect[index + 2] * weight;
|
||||
accum[3] += rect[index + 3] * weight;
|
||||
int xmin = blender::math::max(x - half_size, 0);
|
||||
int xmax = blender::math::min(x + half_size, width - 1);
|
||||
for (int nx = xmin, index = (xmin - x) + half_size; nx <= xmax; nx++, index++) {
|
||||
float weight = gausstab[index];
|
||||
int offset = (y * width + nx) * 4;
|
||||
accum += float4(rect + offset) * weight;
|
||||
accum_weight += weight;
|
||||
}
|
||||
|
||||
float inv_accum_weight = 1.0f / accum_weight;
|
||||
out[out_index + 0] = accum[0] * inv_accum_weight;
|
||||
out[out_index + 1] = accum[1] * inv_accum_weight;
|
||||
out[out_index + 2] = accum[2] * inv_accum_weight;
|
||||
out[out_index + 3] = accum[3] * inv_accum_weight;
|
||||
accum *= (1.0f / accum_weight);
|
||||
dst[0] = accum[0];
|
||||
dst[1] = accum[1];
|
||||
dst[2] = accum[2];
|
||||
dst[3] = accum[3];
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
MEM_freeN(gausstab_x);
|
||||
#undef INDEX
|
||||
}
|
||||
|
||||
static void do_gaussian_blur_effect_byte_y(Sequence *seq,
|
||||
int start_line,
|
||||
int x,
|
||||
int y,
|
||||
int /*frame_width*/,
|
||||
int frame_height,
|
||||
const uchar *rect,
|
||||
uchar *out)
|
||||
template<typename T>
|
||||
static void gaussian_blur_y(const blender::Array<float> &gausstab,
|
||||
int half_size,
|
||||
int start_line,
|
||||
int width,
|
||||
int height,
|
||||
int frame_height,
|
||||
const T *rect,
|
||||
T *dst)
|
||||
{
|
||||
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
|
||||
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
|
||||
const int size_y = int(data->size_y + 0.5f);
|
||||
int i, j;
|
||||
|
||||
/* Make gaussian weight table. */
|
||||
float *gausstab_y;
|
||||
gausstab_y = make_gaussian_blur_kernel(data->size_y, size_y);
|
||||
|
||||
for (i = 0; i < y; i++) {
|
||||
for (j = 0; j < x; j++) {
|
||||
int out_index = INDEX(j, i);
|
||||
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
dst += start_line * width * 4;
|
||||
for (int y = start_line; y < start_line + height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float4 accum(0.0f);
|
||||
float accum_weight = 0.0f;
|
||||
for (int current_y = i - size_y; current_y <= i + size_y; current_y++) {
|
||||
if (current_y < -start_line || current_y + start_line >= frame_height) {
|
||||
/* Out of bounds. */
|
||||
continue;
|
||||
}
|
||||
int index = INDEX(j, current_y + start_line);
|
||||
float weight = gausstab_y[current_y - i + size_y];
|
||||
accum[0] += rect[index] * weight;
|
||||
accum[1] += rect[index + 1] * weight;
|
||||
accum[2] += rect[index + 2] * weight;
|
||||
accum[3] += rect[index + 3] * weight;
|
||||
int ymin = blender::math::max(y - half_size, 0);
|
||||
int ymax = blender::math::min(y + half_size, frame_height - 1);
|
||||
for (int ny = ymin, index = (ymin - y) + half_size; ny <= ymax; ny++, index++) {
|
||||
float weight = gausstab[index];
|
||||
int offset = (ny * width + x) * 4;
|
||||
accum += float4(rect + offset) * weight;
|
||||
accum_weight += weight;
|
||||
}
|
||||
float inv_accum_weight = 1.0f / accum_weight;
|
||||
out[out_index + 0] = accum[0] * inv_accum_weight;
|
||||
out[out_index + 1] = accum[1] * inv_accum_weight;
|
||||
out[out_index + 2] = accum[2] * inv_accum_weight;
|
||||
out[out_index + 3] = accum[3] * inv_accum_weight;
|
||||
accum *= (1.0f / accum_weight);
|
||||
dst[0] = accum[0];
|
||||
dst[1] = accum[1];
|
||||
dst[2] = accum[2];
|
||||
dst[3] = accum[3];
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
MEM_freeN(gausstab_y);
|
||||
#undef INDEX
|
||||
}
|
||||
|
||||
static void do_gaussian_blur_effect_float_x(Sequence *seq,
|
||||
int start_line,
|
||||
int x,
|
||||
int y,
|
||||
int frame_width,
|
||||
int /*frame_height*/,
|
||||
float *rect,
|
||||
float *out)
|
||||
{
|
||||
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
|
||||
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
|
||||
const int size_x = int(data->size_x + 0.5f);
|
||||
int i, j;
|
||||
|
||||
/* Make gaussian weight table. */
|
||||
float *gausstab_x;
|
||||
gausstab_x = make_gaussian_blur_kernel(data->size_x, size_x);
|
||||
|
||||
for (i = 0; i < y; i++) {
|
||||
for (j = 0; j < x; j++) {
|
||||
int out_index = INDEX(j, i);
|
||||
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float accum_weight = 0.0f;
|
||||
for (int current_x = j - size_x; current_x <= j + size_x; current_x++) {
|
||||
if (current_x < 0 || current_x >= frame_width) {
|
||||
/* Out of bounds. */
|
||||
continue;
|
||||
}
|
||||
int index = INDEX(current_x, i + start_line);
|
||||
float weight = gausstab_x[current_x - j + size_x];
|
||||
madd_v4_v4fl(accum, &rect[index], weight);
|
||||
accum_weight += weight;
|
||||
}
|
||||
mul_v4_v4fl(&out[out_index], accum, 1.0f / accum_weight);
|
||||
}
|
||||
}
|
||||
|
||||
MEM_freeN(gausstab_x);
|
||||
#undef INDEX
|
||||
}
|
||||
|
||||
static void do_gaussian_blur_effect_float_y(Sequence *seq,
|
||||
int start_line,
|
||||
int x,
|
||||
int y,
|
||||
int /*frame_width*/,
|
||||
int frame_height,
|
||||
float *rect,
|
||||
float *out)
|
||||
{
|
||||
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
|
||||
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
|
||||
const int size_y = int(data->size_y + 0.5f);
|
||||
int i, j;
|
||||
|
||||
/* Make gaussian weight table. */
|
||||
float *gausstab_y;
|
||||
gausstab_y = make_gaussian_blur_kernel(data->size_y, size_y);
|
||||
|
||||
for (i = 0; i < y; i++) {
|
||||
for (j = 0; j < x; j++) {
|
||||
int out_index = INDEX(j, i);
|
||||
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float accum_weight = 0.0f;
|
||||
for (int current_y = i - size_y; current_y <= i + size_y; current_y++) {
|
||||
if (current_y < -start_line || current_y + start_line >= frame_height) {
|
||||
/* Out of bounds. */
|
||||
continue;
|
||||
}
|
||||
int index = INDEX(j, current_y + start_line);
|
||||
float weight = gausstab_y[current_y - i + size_y];
|
||||
madd_v4_v4fl(accum, &rect[index], weight);
|
||||
accum_weight += weight;
|
||||
}
|
||||
mul_v4_v4fl(&out[out_index], accum, 1.0f / accum_weight);
|
||||
}
|
||||
}
|
||||
|
||||
MEM_freeN(gausstab_y);
|
||||
#undef INDEX
|
||||
}
|
||||
|
||||
static void do_gaussian_blur_effect_x_cb(const SeqRenderData *context,
|
||||
Sequence *seq,
|
||||
ImBuf *ibuf,
|
||||
int start_line,
|
||||
int total_lines,
|
||||
ImBuf *out)
|
||||
{
|
||||
if (out->float_buffer.data) {
|
||||
float *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
|
||||
slice_get_float_buffers(
|
||||
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_gaussian_blur_effect_float_x(seq,
|
||||
start_line,
|
||||
context->rectx,
|
||||
total_lines,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf->float_buffer.data,
|
||||
rect_out);
|
||||
}
|
||||
else {
|
||||
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
|
||||
slice_get_byte_buffers(
|
||||
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_gaussian_blur_effect_byte_x(seq,
|
||||
start_line,
|
||||
context->rectx,
|
||||
total_lines,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf->byte_buffer.data,
|
||||
rect_out);
|
||||
}
|
||||
}
|
||||
|
||||
static void do_gaussian_blur_effect_y_cb(const SeqRenderData *context,
|
||||
Sequence *seq,
|
||||
ImBuf *ibuf,
|
||||
int start_line,
|
||||
int total_lines,
|
||||
ImBuf *out)
|
||||
{
|
||||
if (out->float_buffer.data) {
|
||||
float *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
|
||||
slice_get_float_buffers(
|
||||
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_gaussian_blur_effect_float_y(seq,
|
||||
start_line,
|
||||
context->rectx,
|
||||
total_lines,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf->float_buffer.data,
|
||||
rect_out);
|
||||
}
|
||||
else {
|
||||
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
|
||||
|
||||
slice_get_byte_buffers(
|
||||
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
|
||||
|
||||
do_gaussian_blur_effect_byte_y(seq,
|
||||
start_line,
|
||||
context->rectx,
|
||||
total_lines,
|
||||
context->rectx,
|
||||
context->recty,
|
||||
ibuf->byte_buffer.data,
|
||||
rect_out);
|
||||
}
|
||||
}
|
||||
|
||||
struct RenderGaussianBlurEffectInitData {
|
||||
const SeqRenderData *context;
|
||||
Sequence *seq;
|
||||
ImBuf *ibuf;
|
||||
ImBuf *out;
|
||||
};
|
||||
|
||||
struct RenderGaussianBlurEffectThread {
|
||||
const SeqRenderData *context;
|
||||
Sequence *seq;
|
||||
ImBuf *ibuf;
|
||||
ImBuf *out;
|
||||
int start_line, tot_line;
|
||||
};
|
||||
|
||||
static void render_effect_execute_init_handle(void *handle_v,
|
||||
int start_line,
|
||||
int tot_line,
|
||||
void *init_data_v)
|
||||
{
|
||||
RenderGaussianBlurEffectThread *handle = (RenderGaussianBlurEffectThread *)handle_v;
|
||||
RenderGaussianBlurEffectInitData *init_data = (RenderGaussianBlurEffectInitData *)init_data_v;
|
||||
|
||||
handle->context = init_data->context;
|
||||
handle->seq = init_data->seq;
|
||||
handle->ibuf = init_data->ibuf;
|
||||
handle->out = init_data->out;
|
||||
|
||||
handle->start_line = start_line;
|
||||
handle->tot_line = tot_line;
|
||||
}
|
||||
|
||||
static void *render_effect_execute_do_x_thread(void *thread_data_v)
|
||||
{
|
||||
RenderGaussianBlurEffectThread *thread_data = (RenderGaussianBlurEffectThread *)thread_data_v;
|
||||
do_gaussian_blur_effect_x_cb(thread_data->context,
|
||||
thread_data->seq,
|
||||
thread_data->ibuf,
|
||||
thread_data->start_line,
|
||||
thread_data->tot_line,
|
||||
thread_data->out);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void *render_effect_execute_do_y_thread(void *thread_data_v)
|
||||
{
|
||||
RenderGaussianBlurEffectThread *thread_data = (RenderGaussianBlurEffectThread *)thread_data_v;
|
||||
do_gaussian_blur_effect_y_cb(thread_data->context,
|
||||
thread_data->seq,
|
||||
thread_data->ibuf,
|
||||
thread_data->start_line,
|
||||
thread_data->tot_line,
|
||||
thread_data->out);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static ImBuf *do_gaussian_blur_effect(const SeqRenderData *context,
|
||||
@@ -2935,32 +2470,75 @@ static ImBuf *do_gaussian_blur_effect(const SeqRenderData *context,
|
||||
ImBuf * /*ibuf2*/,
|
||||
ImBuf * /*ibuf3*/)
|
||||
{
|
||||
using namespace blender;
|
||||
|
||||
/* Create blur kernel weights. */
|
||||
const GaussianBlurVars *data = static_cast<const GaussianBlurVars *>(seq->effectdata);
|
||||
const int half_size_x = int(data->size_x + 0.5f);
|
||||
const int half_size_y = int(data->size_y + 0.5f);
|
||||
Array<float> gausstab_x = make_gaussian_blur_kernel(data->size_x, half_size_x);
|
||||
Array<float> gausstab_y = make_gaussian_blur_kernel(data->size_y, half_size_y);
|
||||
|
||||
const int width = context->rectx;
|
||||
const int height = context->recty;
|
||||
const bool is_float = ibuf1->float_buffer.data;
|
||||
|
||||
/* Horizontal blur: create output, blur ibuf1 into it. */
|
||||
ImBuf *out = prepare_effect_imbufs(context, ibuf1, nullptr, nullptr);
|
||||
threading::parallel_for(IndexRange(context->recty), 32, [&](const IndexRange y_range) {
|
||||
const int y_first = y_range.first();
|
||||
const int y_size = y_range.size();
|
||||
if (is_float) {
|
||||
gaussian_blur_x(gausstab_x,
|
||||
half_size_x,
|
||||
y_first,
|
||||
width,
|
||||
y_size,
|
||||
height,
|
||||
ibuf1->float_buffer.data,
|
||||
out->float_buffer.data);
|
||||
}
|
||||
else {
|
||||
gaussian_blur_x(gausstab_x,
|
||||
half_size_x,
|
||||
y_first,
|
||||
width,
|
||||
y_size,
|
||||
height,
|
||||
ibuf1->byte_buffer.data,
|
||||
out->byte_buffer.data);
|
||||
}
|
||||
});
|
||||
|
||||
RenderGaussianBlurEffectInitData init_data;
|
||||
|
||||
init_data.context = context;
|
||||
init_data.seq = seq;
|
||||
init_data.ibuf = ibuf1;
|
||||
init_data.out = out;
|
||||
|
||||
IMB_processor_apply_threaded(out->y,
|
||||
sizeof(RenderGaussianBlurEffectThread),
|
||||
&init_data,
|
||||
render_effect_execute_init_handle,
|
||||
render_effect_execute_do_x_thread);
|
||||
|
||||
/* Vertical blur: create output, blur previous output into it. */
|
||||
ibuf1 = out;
|
||||
init_data.ibuf = ibuf1;
|
||||
out = prepare_effect_imbufs(context, ibuf1, nullptr, nullptr);
|
||||
init_data.out = out;
|
||||
|
||||
IMB_processor_apply_threaded(out->y,
|
||||
sizeof(RenderGaussianBlurEffectThread),
|
||||
&init_data,
|
||||
render_effect_execute_init_handle,
|
||||
render_effect_execute_do_y_thread);
|
||||
threading::parallel_for(IndexRange(context->recty), 32, [&](const IndexRange y_range) {
|
||||
const int y_first = y_range.first();
|
||||
const int y_size = y_range.size();
|
||||
if (is_float) {
|
||||
gaussian_blur_y(gausstab_y,
|
||||
half_size_y,
|
||||
y_first,
|
||||
width,
|
||||
y_size,
|
||||
height,
|
||||
ibuf1->float_buffer.data,
|
||||
out->float_buffer.data);
|
||||
}
|
||||
else {
|
||||
gaussian_blur_y(gausstab_y,
|
||||
half_size_y,
|
||||
y_first,
|
||||
width,
|
||||
y_size,
|
||||
height,
|
||||
ibuf1->byte_buffer.data,
|
||||
out->byte_buffer.data);
|
||||
}
|
||||
});
|
||||
|
||||
/* Free the first output. */
|
||||
IMB_freeImBuf(ibuf1);
|
||||
|
||||
return out;
|
||||
|
||||
Reference in New Issue
Block a user