VSE: reduce effects code duplication, making gaussian blur faster in the process

Now that the code is in C++, quite some duplication between "byte" and
"float" effect code paths can be reduced (easier than it was in C times).
So I did that, removing about 400 lines of code.

In that process I accidentally made Gaussian Blur faster, since while
reducing the amount of code I noticed it was doing some things
sub-optimally (calculated kernel tables for each job, etc.). Applying
100x100 gaussian blur on 4K UHD resolution image strip on Ryzen 5950X
went 630ms -> 450ms.

Pull Request: https://projects.blender.org/blender/blender/pulls/116089
This commit is contained in:
Aras Pranckevicius
2023-12-14 17:31:05 +01:00
committed by Aras Pranckevicius
parent 4a34dcbb69
commit 5cac8e2bb4

View File

@@ -64,6 +64,8 @@
#include "strip_time.hh"
#include "utils.hh"
using blender::float4;
static SeqEffectHandle get_sequence_effect_impl(int seq_type);
/* -------------------------------------------------------------------- */
@@ -120,6 +122,44 @@ static void slice_get_float_buffers(const SeqRenderData *context,
}
}
static float4 load_premul_pixel(const uchar *ptr)
{
float4 res;
straight_uchar_to_premul_float(res, ptr);
return res;
}
static float4 load_premul_pixel(const float *ptr)
{
return float4(ptr);
}
static void store_premul_pixel(const float4 &pix, uchar *dst)
{
premul_float_to_straight_uchar(dst, pix);
}
static void store_premul_pixel(const float4 &pix, float *dst)
{
*reinterpret_cast<float4 *>(dst) = pix;
}
static void store_opaque_black_pixel(uchar *dst)
{
dst[0] = 0;
dst[1] = 0;
dst[2] = 0;
dst[3] = 255;
}
static void store_opaque_black_pixel(float *dst)
{
dst[0] = 0.0f;
dst[1] = 0.0f;
dst[2] = 0.0f;
dst[3] = 1.0f;
}
/** \} */
/* -------------------------------------------------------------------- */
@@ -203,72 +243,32 @@ static void init_alpha_over_or_under(Sequence *seq)
seq->seq1 = seq2;
}
static void do_alphaover_effect_byte(
float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out)
/* dst = src1 over src2 (alpha from src1) */
template<typename T>
static void do_alphaover_effect(
float fac, int width, int height, const T *src1, const T *src2, T *dst)
{
uchar *cp1 = rect1;
uchar *cp2 = rect2;
uchar *rt = out;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
/* rt = rt1 over rt2 (alpha from rt1) */
float tempc[4], rt1[4], rt2[4];
straight_uchar_to_premul_float(rt1, cp1);
straight_uchar_to_premul_float(rt2, cp2);
float mfac = 1.0f - fac * rt1[3];
if (fac <= 0.0f) {
*((uint *)rt) = *((uint *)cp2);
}
else if (mfac <= 0.0f) {
*((uint *)rt) = *((uint *)cp1);
}
else {
tempc[0] = fac * rt1[0] + mfac * rt2[0];
tempc[1] = fac * rt1[1] + mfac * rt2[1];
tempc[2] = fac * rt1[2] + mfac * rt2[2];
tempc[3] = fac * rt1[3] + mfac * rt2[3];
premul_float_to_straight_uchar(rt, tempc);
}
cp1 += 4;
cp2 += 4;
rt += 4;
}
if (fac <= 0.0f) {
memcpy(dst, src2, sizeof(T) * 4 * width * height);
return;
}
}
static void do_alphaover_effect_float(
float fac, int x, int y, float *rect1, float *rect2, float *out)
{
float *rt1 = rect1;
float *rt2 = rect2;
float *rt = out;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
float4 col1 = load_premul_pixel(src1);
float mfac = 1.0f - fac * col1.w;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
/* rt = rt1 over rt2 (alpha from rt1) */
float mfac = 1.0f - (fac * rt1[3]);
if (fac <= 0.0f) {
memcpy(rt, rt2, sizeof(float[4]));
}
else if (mfac <= 0) {
memcpy(rt, rt1, sizeof(float[4]));
if (mfac <= 0.0f) {
memcpy(dst, src1, sizeof(T) * 4);
}
else {
rt[0] = fac * rt1[0] + mfac * rt2[0];
rt[1] = fac * rt1[1] + mfac * rt2[1];
rt[2] = fac * rt1[2] + mfac * rt2[2];
rt[3] = fac * rt1[3] + mfac * rt2[3];
float4 col2 = load_premul_pixel(src2);
float4 col = fac * col1 + mfac * col2;
store_premul_pixel(col, dst);
}
rt1 += 4;
rt2 += 4;
rt += 4;
src1 += 4;
src2 += 4;
dst += 4;
}
}
}
@@ -290,7 +290,7 @@ static void do_alphaover_effect(const SeqRenderData *context,
slice_get_float_buffers(
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_alphaover_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out);
do_alphaover_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
}
else {
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
@@ -298,7 +298,7 @@ static void do_alphaover_effect(const SeqRenderData *context,
slice_get_byte_buffers(
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_alphaover_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out);
do_alphaover_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
}
}
@@ -308,88 +308,34 @@ static void do_alphaover_effect(const SeqRenderData *context,
/** \name Alpha Under Effect
* \{ */
static void do_alphaunder_effect_byte(
float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out)
/* dst = src1 under src2 (alpha from src2) */
template<typename T>
static void do_alphaunder_effect(
float fac, int width, int height, const T *src1, const T *src2, T *dst)
{
uchar *cp1 = rect1;
uchar *cp2 = rect2;
uchar *rt = out;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
/* rt = rt1 under rt2 (alpha from rt2) */
float tempc[4], rt1[4], rt2[4];
straight_uchar_to_premul_float(rt1, cp1);
straight_uchar_to_premul_float(rt2, cp2);
/* this complex optimization is because the
* 'skybuf' can be crossed in
*/
if (rt2[3] <= 0.0f && fac >= 1.0f) {
*((uint *)rt) = *((uint *)cp1);
}
else if (rt2[3] >= 1.0f) {
*((uint *)rt) = *((uint *)cp2);
}
else {
float temp_fac = (fac * (1.0f - rt2[3]));
if (fac <= 0) {
*((uint *)rt) = *((uint *)cp2);
}
else {
tempc[0] = (temp_fac * rt1[0] + rt2[0]);
tempc[1] = (temp_fac * rt1[1] + rt2[1]);
tempc[2] = (temp_fac * rt1[2] + rt2[2]);
tempc[3] = (temp_fac * rt1[3] + rt2[3]);
premul_float_to_straight_uchar(rt, tempc);
}
}
cp1 += 4;
cp2 += 4;
rt += 4;
}
if (fac >= 1.0f) {
memcpy(dst, src1, sizeof(T) * 4 * width * height);
return;
}
}
static void do_alphaunder_effect_float(
float fac, int x, int y, float *rect1, float *rect2, float *out)
{
float *rt1 = rect1;
float *rt2 = rect2;
float *rt = out;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
/* rt = rt1 under rt2 (alpha from rt2) */
/* this complex optimization is because the
* 'skybuf' can be crossed in
*/
if (rt2[3] <= 0 && fac >= 1.0f) {
memcpy(rt, rt1, sizeof(float[4]));
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
float4 col2 = load_premul_pixel(src2);
if (col2.w <= 0.0f) {
memcpy(dst, src1, sizeof(T) * 4);
}
else if (rt2[3] >= 1.0f) {
memcpy(rt, rt2, sizeof(float[4]));
else if (col2.w >= 1.0f || fac <= 0.0f) {
memcpy(dst, src2, sizeof(T) * 4);
}
else {
float temp_fac = fac * (1.0f - rt2[3]);
if (fac == 0) {
memcpy(rt, rt2, sizeof(float[4]));
}
else {
rt[0] = temp_fac * rt1[0] + rt2[0];
rt[1] = temp_fac * rt1[1] + rt2[1];
rt[2] = temp_fac * rt1[2] + rt2[2];
rt[3] = temp_fac * rt1[3] + rt2[3];
}
float mfac = fac * (1.0f - col2.w);
float4 col1 = load_premul_pixel(src1);
float4 col = mfac * col1 + col2;
store_premul_pixel(col, dst);
}
rt1 += 4;
rt2 += 4;
rt += 4;
src1 += 4;
src2 += 4;
dst += 4;
}
}
}
@@ -411,7 +357,7 @@ static void do_alphaunder_effect(const SeqRenderData *context,
slice_get_float_buffers(
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_alphaunder_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out);
do_alphaunder_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
}
else {
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
@@ -419,7 +365,7 @@ static void do_alphaunder_effect(const SeqRenderData *context,
slice_get_byte_buffers(
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_alphaunder_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out);
do_alphaunder_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
}
}
@@ -527,53 +473,24 @@ static float invGammaCorrect(float c)
return sqrtf_signed(c);
}
static void do_gammacross_effect_byte(
float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out)
template<typename T>
static void do_gammacross_effect(
float fac, int width, int height, const T *src1, const T *src2, T *dst)
{
uchar *cp1 = rect1;
uchar *cp2 = rect2;
uchar *rt = out;
float mfac = 1.0f - fac;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
float rt1[4], rt2[4], tempc[4];
straight_uchar_to_premul_float(rt1, cp1);
straight_uchar_to_premul_float(rt2, cp2);
tempc[0] = gammaCorrect(mfac * invGammaCorrect(rt1[0]) + fac * invGammaCorrect(rt2[0]));
tempc[1] = gammaCorrect(mfac * invGammaCorrect(rt1[1]) + fac * invGammaCorrect(rt2[1]));
tempc[2] = gammaCorrect(mfac * invGammaCorrect(rt1[2]) + fac * invGammaCorrect(rt2[2]));
tempc[3] = gammaCorrect(mfac * invGammaCorrect(rt1[3]) + fac * invGammaCorrect(rt2[3]));
premul_float_to_straight_uchar(rt, tempc);
cp1 += 4;
cp2 += 4;
rt += 4;
}
}
}
static void do_gammacross_effect_float(
float fac, int x, int y, float *rect1, float *rect2, float *out)
{
float *rt1 = rect1;
float *rt2 = rect2;
float *rt = out;
float mfac = 1.0f - fac;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
rt[0] = gammaCorrect(mfac * invGammaCorrect(rt1[0]) + fac * invGammaCorrect(rt2[0]));
rt[1] = gammaCorrect(mfac * invGammaCorrect(rt1[1]) + fac * invGammaCorrect(rt2[1]));
rt[2] = gammaCorrect(mfac * invGammaCorrect(rt1[2]) + fac * invGammaCorrect(rt2[2]));
rt[3] = gammaCorrect(mfac * invGammaCorrect(rt1[3]) + fac * invGammaCorrect(rt2[3]));
rt1 += 4;
rt2 += 4;
rt += 4;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
float4 col1 = load_premul_pixel(src1);
float4 col2 = load_premul_pixel(src2);
float4 col;
for (int c = 0; c < 4; ++c) {
col[c] = gammaCorrect(mfac * invGammaCorrect(col1[c]) + fac * invGammaCorrect(col2[c]));
}
store_premul_pixel(col, dst);
src1 += 4;
src2 += 4;
dst += 4;
}
}
}
@@ -604,7 +521,7 @@ static void do_gammacross_effect(const SeqRenderData *context,
slice_get_float_buffers(
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_gammacross_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out);
do_gammacross_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
}
else {
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
@@ -612,7 +529,7 @@ static void do_gammacross_effect(const SeqRenderData *context,
slice_get_byte_buffers(
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_gammacross_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out);
do_gammacross_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out);
}
}
@@ -943,57 +860,21 @@ static void do_mul_effect(const SeqRenderData *context,
/** \name Blend Mode Effect
* \{ */
using IMB_blend_func_byte = void (*)(uchar *dst, const uchar *src1, const uchar *src2);
using IMB_blend_func_float = void (*)(float *dst, const float *src1, const float *src2);
BLI_INLINE void apply_blend_function_byte(float fac,
int x,
int y,
uchar *rect1,
uchar *rect2,
uchar *out,
IMB_blend_func_byte blend_function)
/* blend_function has to be: void (T* dst, const T *src1, const T *src2) */
template<typename T, typename Func>
static void apply_blend_function(
float fac, int width, int height, const T *src1, T *src2, T *dst, Func blend_function)
{
uchar *rt1 = rect1;
uchar *rt2 = rect2;
uchar *rt = out;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
uint achannel = rt2[3];
rt2[3] = uint(achannel) * fac;
blend_function(rt, rt1, rt2);
rt2[3] = achannel;
rt[3] = rt1[3];
rt1 += 4;
rt2 += 4;
rt += 4;
}
}
}
BLI_INLINE void apply_blend_function_float(float fac,
int x,
int y,
float *rect1,
float *rect2,
float *out,
IMB_blend_func_float blend_function)
{
float *rt1 = rect1;
float *rt2 = rect2;
float *rt = out;
for (int i = 0; i < y; i++) {
for (int j = 0; j < x; j++) {
float achannel = rt2[3];
rt2[3] = achannel * fac;
blend_function(rt, rt1, rt2);
rt2[3] = achannel;
rt[3] = rt1[3];
rt1 += 4;
rt2 += 4;
rt += 4;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
T achannel = src2[3];
src2[3] = T(achannel * fac);
blend_function(dst, src1, src2);
src2[3] = achannel;
dst[3] = src1[3];
src1 += 4;
src2 += 4;
dst += 4;
}
}
}
@@ -1003,67 +884,67 @@ static void do_blend_effect_float(
{
switch (btype) {
case SEQ_TYPE_ADD:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_add_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_add_float);
break;
case SEQ_TYPE_SUB:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_sub_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_sub_float);
break;
case SEQ_TYPE_MUL:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_mul_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_mul_float);
break;
case SEQ_TYPE_DARKEN:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_darken_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_darken_float);
break;
case SEQ_TYPE_COLOR_BURN:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_burn_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_burn_float);
break;
case SEQ_TYPE_LINEAR_BURN:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_linearburn_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearburn_float);
break;
case SEQ_TYPE_SCREEN:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_screen_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_screen_float);
break;
case SEQ_TYPE_LIGHTEN:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_lighten_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_lighten_float);
break;
case SEQ_TYPE_DODGE:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_dodge_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_dodge_float);
break;
case SEQ_TYPE_OVERLAY:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_overlay_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_overlay_float);
break;
case SEQ_TYPE_SOFT_LIGHT:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_softlight_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_softlight_float);
break;
case SEQ_TYPE_HARD_LIGHT:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_hardlight_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hardlight_float);
break;
case SEQ_TYPE_PIN_LIGHT:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_pinlight_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_pinlight_float);
break;
case SEQ_TYPE_LIN_LIGHT:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_linearlight_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearlight_float);
break;
case SEQ_TYPE_VIVID_LIGHT:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_vividlight_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_vividlight_float);
break;
case SEQ_TYPE_BLEND_COLOR:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_color_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_color_float);
break;
case SEQ_TYPE_HUE:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_hue_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hue_float);
break;
case SEQ_TYPE_SATURATION:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_saturation_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_saturation_float);
break;
case SEQ_TYPE_VALUE:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_luminosity_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_luminosity_float);
break;
case SEQ_TYPE_DIFFERENCE:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_difference_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_difference_float);
break;
case SEQ_TYPE_EXCLUSION:
apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_exclusion_float);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_exclusion_float);
break;
default:
break;
@@ -1075,67 +956,67 @@ static void do_blend_effect_byte(
{
switch (btype) {
case SEQ_TYPE_ADD:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_add_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_add_byte);
break;
case SEQ_TYPE_SUB:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_sub_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_sub_byte);
break;
case SEQ_TYPE_MUL:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_mul_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_mul_byte);
break;
case SEQ_TYPE_DARKEN:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_darken_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_darken_byte);
break;
case SEQ_TYPE_COLOR_BURN:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_burn_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_burn_byte);
break;
case SEQ_TYPE_LINEAR_BURN:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_linearburn_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearburn_byte);
break;
case SEQ_TYPE_SCREEN:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_screen_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_screen_byte);
break;
case SEQ_TYPE_LIGHTEN:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_lighten_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_lighten_byte);
break;
case SEQ_TYPE_DODGE:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_dodge_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_dodge_byte);
break;
case SEQ_TYPE_OVERLAY:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_overlay_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_overlay_byte);
break;
case SEQ_TYPE_SOFT_LIGHT:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_softlight_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_softlight_byte);
break;
case SEQ_TYPE_HARD_LIGHT:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_hardlight_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hardlight_byte);
break;
case SEQ_TYPE_PIN_LIGHT:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_pinlight_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_pinlight_byte);
break;
case SEQ_TYPE_LIN_LIGHT:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_linearlight_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearlight_byte);
break;
case SEQ_TYPE_VIVID_LIGHT:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_vividlight_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_vividlight_byte);
break;
case SEQ_TYPE_BLEND_COLOR:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_color_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_color_byte);
break;
case SEQ_TYPE_HUE:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_hue_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hue_byte);
break;
case SEQ_TYPE_SATURATION:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_saturation_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_saturation_byte);
break;
case SEQ_TYPE_VALUE:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_luminosity_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_luminosity_byte);
break;
case SEQ_TYPE_DIFFERENCE:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_difference_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_difference_byte);
break;
case SEQ_TYPE_EXCLUSION:
apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_exclusion_byte);
apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_exclusion_byte);
break;
default:
break;
@@ -1516,58 +1397,38 @@ static void copy_wipe_effect(Sequence *dst, Sequence *src, const int /*flag*/)
dst->effectdata = MEM_dupallocN(src->effectdata);
}
static void do_wipe_effect_byte(const Sequence *seq,
float fac,
int width,
int height,
const uchar *rect1,
const uchar *rect2,
uchar *out)
template<typename T>
static void do_wipe_effect(
const Sequence *seq, float fac, int width, int height, const T *rect1, const T *rect2, T *out)
{
using namespace blender;
const WipeVars *wipe = (const WipeVars *)seq->effectdata;
const WipeZone wipezone = precalc_wipe_zone(wipe, width, height);
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
const uchar *cp1 = rect1 + y_range.first() * width * 4;
const uchar *cp2 = rect2 + y_range.first() * width * 4;
uchar *rt = out + y_range.first() * width * 4;
const T *cp1 = rect1 ? rect1 + y_range.first() * width * 4 : nullptr;
const T *cp2 = rect2 ? rect2 + y_range.first() * width * 4 : nullptr;
T *rt = out + y_range.first() * width * 4;
for (const int y : y_range) {
for (int x = 0; x < width; x++) {
float check = check_zone(&wipezone, x, y, fac);
if (check) {
if (cp1) {
float rt1[4], rt2[4], tempc[4];
straight_uchar_to_premul_float(rt1, cp1);
straight_uchar_to_premul_float(rt2, cp2);
tempc[0] = rt1[0] * check + rt2[0] * (1 - check);
tempc[1] = rt1[1] * check + rt2[1] * (1 - check);
tempc[2] = rt1[2] * check + rt2[2] * (1 - check);
tempc[3] = rt1[3] * check + rt2[3] * (1 - check);
premul_float_to_straight_uchar(rt, tempc);
float4 col1 = load_premul_pixel(cp1);
float4 col2 = load_premul_pixel(cp2);
float4 col = col1 * check + col2 * (1.0f - check);
store_premul_pixel(col, rt);
}
else {
rt[0] = 0;
rt[1] = 0;
rt[2] = 0;
rt[3] = 255;
store_opaque_black_pixel(rt);
}
}
else {
if (cp2) {
rt[0] = cp2[0];
rt[1] = cp2[1];
rt[2] = cp2[2];
rt[3] = cp2[3];
memcpy(rt, cp2, sizeof(T) * 4);
}
else {
rt[0] = 0;
rt[1] = 0;
rt[2] = 0;
rt[3] = 255;
store_opaque_black_pixel(rt);
}
}
@@ -1583,66 +1444,6 @@ static void do_wipe_effect_byte(const Sequence *seq,
});
}
static void do_wipe_effect_float(Sequence *seq,
float fac,
int width,
int height,
const float *rect1,
const float *rect2,
float *out)
{
using namespace blender;
const WipeVars *wipe = (const WipeVars *)seq->effectdata;
const WipeZone wipezone = precalc_wipe_zone(wipe, width, height);
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
const float *rt1 = rect1 + y_range.first() * width * 4;
const float *rt2 = rect2 + y_range.first() * width * 4;
float *rt = out + y_range.first() * width * 4;
for (const int y : y_range) {
for (int x = 0; x < width; x++) {
float check = check_zone(&wipezone, x, y, fac);
if (check) {
if (rt1) {
rt[0] = rt1[0] * check + rt2[0] * (1 - check);
rt[1] = rt1[1] * check + rt2[1] * (1 - check);
rt[2] = rt1[2] * check + rt2[2] * (1 - check);
rt[3] = rt1[3] * check + rt2[3] * (1 - check);
}
else {
rt[0] = 0;
rt[1] = 0;
rt[2] = 0;
rt[3] = 1.0;
}
}
else {
if (rt2) {
rt[0] = rt2[0];
rt[1] = rt2[1];
rt[2] = rt2[2];
rt[3] = rt2[3];
}
else {
rt[0] = 0;
rt[1] = 0;
rt[2] = 0;
rt[3] = 1.0;
}
}
rt += 4;
if (rt1 != nullptr) {
rt1 += 4;
}
if (rt2 != nullptr) {
rt2 += 4;
}
}
}
});
}
static ImBuf *do_wipe_effect(const SeqRenderData *context,
Sequence *seq,
float /*timeline_frame*/,
@@ -1654,22 +1455,22 @@ static ImBuf *do_wipe_effect(const SeqRenderData *context,
ImBuf *out = prepare_effect_imbufs(context, ibuf1, ibuf2, ibuf3);
if (out->float_buffer.data) {
do_wipe_effect_float(seq,
fac,
context->rectx,
context->recty,
ibuf1->float_buffer.data,
ibuf2->float_buffer.data,
out->float_buffer.data);
do_wipe_effect(seq,
fac,
context->rectx,
context->recty,
ibuf1->float_buffer.data,
ibuf2->float_buffer.data,
out->float_buffer.data);
}
else {
do_wipe_effect_byte(seq,
fac,
context->rectx,
context->recty,
ibuf1->byte_buffer.data,
ibuf2->byte_buffer.data,
out->byte_buffer.data);
do_wipe_effect(seq,
fac,
context->rectx,
context->recty,
ibuf1->byte_buffer.data,
ibuf2->byte_buffer.data,
out->byte_buffer.data);
}
return out;
@@ -1837,12 +1638,8 @@ static void do_transform_effect(const SeqRenderData *context,
/** \name Glow Effect
* \{ */
static void glow_blur_bitmap(const blender::float4 *src,
blender::float4 *map,
int width,
int height,
float blur,
int quality)
static void glow_blur_bitmap(
const float4 *src, float4 *map, int width, int height, float blur, int quality)
{
using namespace blender;
@@ -1913,8 +1710,8 @@ static void glow_blur_bitmap(const blender::float4 *src,
});
}
static void blur_isolate_highlights(const blender::float4 *in,
blender::float4 *out,
static void blur_isolate_highlights(const float4 *in,
float4 *out,
int width,
int height,
float threshold,
@@ -2525,7 +2322,7 @@ static void do_overdrop_effect(const SeqRenderData *context,
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_drop_effect_float(fac, x, y, rect1, rect2, rect_out);
do_alphaover_effect_float(fac, x, y, rect1, rect2, rect_out);
do_alphaover_effect(fac, x, y, rect1, rect2, rect_out);
}
else {
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
@@ -2534,7 +2331,7 @@ static void do_overdrop_effect(const SeqRenderData *context,
context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_drop_effect_byte(fac, x, y, rect1, rect2, rect_out);
do_alphaover_effect_byte(fac, x, y, rect1, rect2, rect_out);
do_alphaover_effect(fac, x, y, rect1, rect2, rect_out);
}
}
@@ -2544,14 +2341,6 @@ static void do_overdrop_effect(const SeqRenderData *context,
/** \name Gaussian Blur
* \{ */
/* NOTE: This gaussian blur implementation accumulates values in the square
* kernel rather that doing X direction and then Y direction because of the
* lack of using multiple-staged filters.
*
* Once we can we'll implement a way to apply filter as multiple stages we
* can optimize hell of a lot in here.
*/
static void init_gaussian_blur_effect(Sequence *seq)
{
if (seq->effectdata) {
@@ -2585,346 +2374,92 @@ static int early_out_gaussian_blur(Sequence *seq, float /*fac*/)
return EARLY_DO_EFFECT;
}
/* TODO(sergey): De-duplicate with compositor. */
static float *make_gaussian_blur_kernel(float rad, int size)
static blender::Array<float> make_gaussian_blur_kernel(float rad, int size)
{
float *gausstab, sum, val;
float fac;
int i, n;
int n = 2 * size + 1;
blender::Array<float> gausstab(n);
n = 2 * size + 1;
gausstab = (float *)MEM_mallocN(sizeof(float) * n, __func__);
sum = 0.0f;
fac = (rad > 0.0f ? 1.0f / rad : 0.0f);
for (i = -size; i <= size; i++) {
val = RE_filter_value(R_FILTER_GAUSS, float(i) * fac);
float sum = 0.0f;
float fac = (rad > 0.0f ? 1.0f / rad : 0.0f);
for (int i = -size; i <= size; i++) {
float val = RE_filter_value(R_FILTER_GAUSS, float(i) * fac);
sum += val;
gausstab[i + size] = val;
}
sum = 1.0f / sum;
for (i = 0; i < n; i++) {
gausstab[i] *= sum;
float inv_sum = 1.0f / sum;
for (int i = 0; i < n; i++) {
gausstab[i] *= inv_sum;
}
return gausstab;
}
static void do_gaussian_blur_effect_byte_x(Sequence *seq,
int start_line,
int x,
int y,
int frame_width,
int /*frame_height*/,
const uchar *rect,
uchar *out)
template<typename T>
static void gaussian_blur_x(const blender::Array<float> &gausstab,
int half_size,
int start_line,
int width,
int height,
int /*frame_height*/,
const T *rect,
T *dst)
{
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
const int size_x = int(data->size_x + 0.5f);
int i, j;
/* Make gaussian weight table. */
float *gausstab_x;
gausstab_x = make_gaussian_blur_kernel(data->size_x, size_x);
for (i = 0; i < y; i++) {
for (j = 0; j < x; j++) {
int out_index = INDEX(j, i);
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
dst += start_line * width * 4;
for (int y = start_line; y < start_line + height; y++) {
for (int x = 0; x < width; x++) {
float4 accum(0.0f);
float accum_weight = 0.0f;
for (int current_x = j - size_x; current_x <= j + size_x; current_x++) {
if (current_x < 0 || current_x >= frame_width) {
/* Out of bounds. */
continue;
}
int index = INDEX(current_x, i + start_line);
float weight = gausstab_x[current_x - j + size_x];
accum[0] += rect[index] * weight;
accum[1] += rect[index + 1] * weight;
accum[2] += rect[index + 2] * weight;
accum[3] += rect[index + 3] * weight;
int xmin = blender::math::max(x - half_size, 0);
int xmax = blender::math::min(x + half_size, width - 1);
for (int nx = xmin, index = (xmin - x) + half_size; nx <= xmax; nx++, index++) {
float weight = gausstab[index];
int offset = (y * width + nx) * 4;
accum += float4(rect + offset) * weight;
accum_weight += weight;
}
float inv_accum_weight = 1.0f / accum_weight;
out[out_index + 0] = accum[0] * inv_accum_weight;
out[out_index + 1] = accum[1] * inv_accum_weight;
out[out_index + 2] = accum[2] * inv_accum_weight;
out[out_index + 3] = accum[3] * inv_accum_weight;
accum *= (1.0f / accum_weight);
dst[0] = accum[0];
dst[1] = accum[1];
dst[2] = accum[2];
dst[3] = accum[3];
dst += 4;
}
}
MEM_freeN(gausstab_x);
#undef INDEX
}
static void do_gaussian_blur_effect_byte_y(Sequence *seq,
int start_line,
int x,
int y,
int /*frame_width*/,
int frame_height,
const uchar *rect,
uchar *out)
template<typename T>
static void gaussian_blur_y(const blender::Array<float> &gausstab,
int half_size,
int start_line,
int width,
int height,
int frame_height,
const T *rect,
T *dst)
{
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
const int size_y = int(data->size_y + 0.5f);
int i, j;
/* Make gaussian weight table. */
float *gausstab_y;
gausstab_y = make_gaussian_blur_kernel(data->size_y, size_y);
for (i = 0; i < y; i++) {
for (j = 0; j < x; j++) {
int out_index = INDEX(j, i);
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
dst += start_line * width * 4;
for (int y = start_line; y < start_line + height; y++) {
for (int x = 0; x < width; x++) {
float4 accum(0.0f);
float accum_weight = 0.0f;
for (int current_y = i - size_y; current_y <= i + size_y; current_y++) {
if (current_y < -start_line || current_y + start_line >= frame_height) {
/* Out of bounds. */
continue;
}
int index = INDEX(j, current_y + start_line);
float weight = gausstab_y[current_y - i + size_y];
accum[0] += rect[index] * weight;
accum[1] += rect[index + 1] * weight;
accum[2] += rect[index + 2] * weight;
accum[3] += rect[index + 3] * weight;
int ymin = blender::math::max(y - half_size, 0);
int ymax = blender::math::min(y + half_size, frame_height - 1);
for (int ny = ymin, index = (ymin - y) + half_size; ny <= ymax; ny++, index++) {
float weight = gausstab[index];
int offset = (ny * width + x) * 4;
accum += float4(rect + offset) * weight;
accum_weight += weight;
}
float inv_accum_weight = 1.0f / accum_weight;
out[out_index + 0] = accum[0] * inv_accum_weight;
out[out_index + 1] = accum[1] * inv_accum_weight;
out[out_index + 2] = accum[2] * inv_accum_weight;
out[out_index + 3] = accum[3] * inv_accum_weight;
accum *= (1.0f / accum_weight);
dst[0] = accum[0];
dst[1] = accum[1];
dst[2] = accum[2];
dst[3] = accum[3];
dst += 4;
}
}
MEM_freeN(gausstab_y);
#undef INDEX
}
static void do_gaussian_blur_effect_float_x(Sequence *seq,
int start_line,
int x,
int y,
int frame_width,
int /*frame_height*/,
float *rect,
float *out)
{
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
const int size_x = int(data->size_x + 0.5f);
int i, j;
/* Make gaussian weight table. */
float *gausstab_x;
gausstab_x = make_gaussian_blur_kernel(data->size_x, size_x);
for (i = 0; i < y; i++) {
for (j = 0; j < x; j++) {
int out_index = INDEX(j, i);
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
float accum_weight = 0.0f;
for (int current_x = j - size_x; current_x <= j + size_x; current_x++) {
if (current_x < 0 || current_x >= frame_width) {
/* Out of bounds. */
continue;
}
int index = INDEX(current_x, i + start_line);
float weight = gausstab_x[current_x - j + size_x];
madd_v4_v4fl(accum, &rect[index], weight);
accum_weight += weight;
}
mul_v4_v4fl(&out[out_index], accum, 1.0f / accum_weight);
}
}
MEM_freeN(gausstab_x);
#undef INDEX
}
static void do_gaussian_blur_effect_float_y(Sequence *seq,
int start_line,
int x,
int y,
int /*frame_width*/,
int frame_height,
float *rect,
float *out)
{
#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4)
GaussianBlurVars *data = static_cast<GaussianBlurVars *>(seq->effectdata);
const int size_y = int(data->size_y + 0.5f);
int i, j;
/* Make gaussian weight table. */
float *gausstab_y;
gausstab_y = make_gaussian_blur_kernel(data->size_y, size_y);
for (i = 0; i < y; i++) {
for (j = 0; j < x; j++) {
int out_index = INDEX(j, i);
float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
float accum_weight = 0.0f;
for (int current_y = i - size_y; current_y <= i + size_y; current_y++) {
if (current_y < -start_line || current_y + start_line >= frame_height) {
/* Out of bounds. */
continue;
}
int index = INDEX(j, current_y + start_line);
float weight = gausstab_y[current_y - i + size_y];
madd_v4_v4fl(accum, &rect[index], weight);
accum_weight += weight;
}
mul_v4_v4fl(&out[out_index], accum, 1.0f / accum_weight);
}
}
MEM_freeN(gausstab_y);
#undef INDEX
}
static void do_gaussian_blur_effect_x_cb(const SeqRenderData *context,
Sequence *seq,
ImBuf *ibuf,
int start_line,
int total_lines,
ImBuf *out)
{
if (out->float_buffer.data) {
float *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
slice_get_float_buffers(
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_gaussian_blur_effect_float_x(seq,
start_line,
context->rectx,
total_lines,
context->rectx,
context->recty,
ibuf->float_buffer.data,
rect_out);
}
else {
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
slice_get_byte_buffers(
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_gaussian_blur_effect_byte_x(seq,
start_line,
context->rectx,
total_lines,
context->rectx,
context->recty,
ibuf->byte_buffer.data,
rect_out);
}
}
static void do_gaussian_blur_effect_y_cb(const SeqRenderData *context,
Sequence *seq,
ImBuf *ibuf,
int start_line,
int total_lines,
ImBuf *out)
{
if (out->float_buffer.data) {
float *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
slice_get_float_buffers(
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_gaussian_blur_effect_float_y(seq,
start_line,
context->rectx,
total_lines,
context->rectx,
context->recty,
ibuf->float_buffer.data,
rect_out);
}
else {
uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr;
slice_get_byte_buffers(
context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out);
do_gaussian_blur_effect_byte_y(seq,
start_line,
context->rectx,
total_lines,
context->rectx,
context->recty,
ibuf->byte_buffer.data,
rect_out);
}
}
struct RenderGaussianBlurEffectInitData {
const SeqRenderData *context;
Sequence *seq;
ImBuf *ibuf;
ImBuf *out;
};
struct RenderGaussianBlurEffectThread {
const SeqRenderData *context;
Sequence *seq;
ImBuf *ibuf;
ImBuf *out;
int start_line, tot_line;
};
static void render_effect_execute_init_handle(void *handle_v,
int start_line,
int tot_line,
void *init_data_v)
{
RenderGaussianBlurEffectThread *handle = (RenderGaussianBlurEffectThread *)handle_v;
RenderGaussianBlurEffectInitData *init_data = (RenderGaussianBlurEffectInitData *)init_data_v;
handle->context = init_data->context;
handle->seq = init_data->seq;
handle->ibuf = init_data->ibuf;
handle->out = init_data->out;
handle->start_line = start_line;
handle->tot_line = tot_line;
}
static void *render_effect_execute_do_x_thread(void *thread_data_v)
{
RenderGaussianBlurEffectThread *thread_data = (RenderGaussianBlurEffectThread *)thread_data_v;
do_gaussian_blur_effect_x_cb(thread_data->context,
thread_data->seq,
thread_data->ibuf,
thread_data->start_line,
thread_data->tot_line,
thread_data->out);
return nullptr;
}
static void *render_effect_execute_do_y_thread(void *thread_data_v)
{
RenderGaussianBlurEffectThread *thread_data = (RenderGaussianBlurEffectThread *)thread_data_v;
do_gaussian_blur_effect_y_cb(thread_data->context,
thread_data->seq,
thread_data->ibuf,
thread_data->start_line,
thread_data->tot_line,
thread_data->out);
return nullptr;
}
static ImBuf *do_gaussian_blur_effect(const SeqRenderData *context,
@@ -2935,32 +2470,75 @@ static ImBuf *do_gaussian_blur_effect(const SeqRenderData *context,
ImBuf * /*ibuf2*/,
ImBuf * /*ibuf3*/)
{
using namespace blender;
/* Create blur kernel weights. */
const GaussianBlurVars *data = static_cast<const GaussianBlurVars *>(seq->effectdata);
const int half_size_x = int(data->size_x + 0.5f);
const int half_size_y = int(data->size_y + 0.5f);
Array<float> gausstab_x = make_gaussian_blur_kernel(data->size_x, half_size_x);
Array<float> gausstab_y = make_gaussian_blur_kernel(data->size_y, half_size_y);
const int width = context->rectx;
const int height = context->recty;
const bool is_float = ibuf1->float_buffer.data;
/* Horizontal blur: create output, blur ibuf1 into it. */
ImBuf *out = prepare_effect_imbufs(context, ibuf1, nullptr, nullptr);
threading::parallel_for(IndexRange(context->recty), 32, [&](const IndexRange y_range) {
const int y_first = y_range.first();
const int y_size = y_range.size();
if (is_float) {
gaussian_blur_x(gausstab_x,
half_size_x,
y_first,
width,
y_size,
height,
ibuf1->float_buffer.data,
out->float_buffer.data);
}
else {
gaussian_blur_x(gausstab_x,
half_size_x,
y_first,
width,
y_size,
height,
ibuf1->byte_buffer.data,
out->byte_buffer.data);
}
});
RenderGaussianBlurEffectInitData init_data;
init_data.context = context;
init_data.seq = seq;
init_data.ibuf = ibuf1;
init_data.out = out;
IMB_processor_apply_threaded(out->y,
sizeof(RenderGaussianBlurEffectThread),
&init_data,
render_effect_execute_init_handle,
render_effect_execute_do_x_thread);
/* Vertical blur: create output, blur previous output into it. */
ibuf1 = out;
init_data.ibuf = ibuf1;
out = prepare_effect_imbufs(context, ibuf1, nullptr, nullptr);
init_data.out = out;
IMB_processor_apply_threaded(out->y,
sizeof(RenderGaussianBlurEffectThread),
&init_data,
render_effect_execute_init_handle,
render_effect_execute_do_y_thread);
threading::parallel_for(IndexRange(context->recty), 32, [&](const IndexRange y_range) {
const int y_first = y_range.first();
const int y_size = y_range.size();
if (is_float) {
gaussian_blur_y(gausstab_y,
half_size_y,
y_first,
width,
y_size,
height,
ibuf1->float_buffer.data,
out->float_buffer.data);
}
else {
gaussian_blur_y(gausstab_y,
half_size_y,
y_first,
width,
y_size,
height,
ibuf1->byte_buffer.data,
out->byte_buffer.data);
}
});
/* Free the first output. */
IMB_freeImBuf(ibuf1);
return out;