VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
/* SPDX-FileCopyrightText: 2012-2024 Blender Authors
|
2023-05-31 16:19:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2019-02-18 08:08:12 +11:00
|
|
|
/** \file
|
|
|
|
|
* \ingroup bke
|
2012-08-19 15:41:56 +00:00
|
|
|
*/
|
|
|
|
|
|
2025-01-26 20:08:04 +01:00
|
|
|
#include <algorithm>
|
2023-07-22 11:27:25 +10:00
|
|
|
#include <cstddef>
|
|
|
|
|
#include <cstring>
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2024-09-19 18:14:49 +02:00
|
|
|
#include "BLI_array.hh"
|
2025-02-11 16:59:42 +01:00
|
|
|
#include "BLI_listbase.h"
|
2024-09-11 12:42:03 +02:00
|
|
|
#include "BLI_math_geom.h"
|
2024-09-19 18:14:49 +02:00
|
|
|
#include "BLI_math_vector.hh"
|
2012-08-19 15:41:56 +00:00
|
|
|
#include "BLI_string.h"
|
2023-10-18 17:15:30 +02:00
|
|
|
#include "BLI_string_utils.hh"
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
#include "BLI_task.hh"
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2024-02-09 18:59:42 +01:00
|
|
|
#include "BLT_translation.hh"
|
2013-03-25 08:29:06 +00:00
|
|
|
|
2019-08-15 20:23:13 +02:00
|
|
|
#include "DNA_mask_types.h"
|
2020-03-19 09:33:03 +01:00
|
|
|
#include "DNA_sequence_types.h"
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2023-12-21 10:10:53 +01:00
|
|
|
#include "BKE_colortools.hh"
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2024-01-18 22:50:23 +02:00
|
|
|
#include "IMB_colormanagement.hh"
|
|
|
|
|
#include "IMB_imbuf.hh"
|
|
|
|
|
#include "IMB_imbuf_types.hh"
|
2020-11-01 21:03:31 +01:00
|
|
|
|
2023-11-02 01:05:06 +01:00
|
|
|
#include "SEQ_modifier.hh"
|
|
|
|
|
#include "SEQ_render.hh"
|
|
|
|
|
#include "SEQ_sound.hh"
|
2024-05-07 19:37:24 +02:00
|
|
|
#include "SEQ_time.hh"
|
|
|
|
|
#include "SEQ_utils.hh"
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2023-08-28 15:01:05 +02:00
|
|
|
#include "BLO_read_write.hh"
|
2020-11-06 14:27:51 +01:00
|
|
|
|
2023-11-02 01:05:06 +01:00
|
|
|
#include "render.hh"
|
2020-10-26 00:47:06 +01:00
|
|
|
|
2025-03-06 06:22:14 +01:00
|
|
|
namespace blender::seq {
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/* -------------------------------------------------------------------- */
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static float4 load_pixel_premul(const uchar *ptr)
|
|
|
|
|
{
|
|
|
|
|
float4 res;
|
|
|
|
|
straight_uchar_to_premul_float(res, ptr);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static float4 load_pixel_premul(const float *ptr)
|
|
|
|
|
{
|
|
|
|
|
return float4(ptr);
|
|
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static void store_pixel_premul(float4 pix, uchar *ptr)
|
|
|
|
|
{
|
|
|
|
|
premul_float_to_straight_uchar(ptr, pix);
|
|
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static void store_pixel_premul(float4 pix, float *ptr)
|
|
|
|
|
{
|
|
|
|
|
*reinterpret_cast<float4 *>(ptr) = pix;
|
|
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static float4 load_pixel_raw(const uchar *ptr)
|
|
|
|
|
{
|
|
|
|
|
float4 res;
|
|
|
|
|
rgba_uchar_to_float(res, ptr);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static float4 load_pixel_raw(const float *ptr)
|
|
|
|
|
{
|
|
|
|
|
return float4(ptr);
|
|
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static void store_pixel_raw(float4 pix, uchar *ptr)
|
|
|
|
|
{
|
|
|
|
|
rgba_float_to_uchar(ptr, pix);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void store_pixel_raw(float4 pix, float *ptr)
|
|
|
|
|
{
|
|
|
|
|
*reinterpret_cast<float4 *>(ptr) = pix;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Byte mask */
|
|
|
|
|
static void apply_and_advance_mask(float4 input, float4 &result, const uchar *&mask)
|
|
|
|
|
{
|
|
|
|
|
float3 m;
|
|
|
|
|
rgb_uchar_to_float(m, mask);
|
|
|
|
|
result.x = math::interpolate(input.x, result.x, m.x);
|
|
|
|
|
result.y = math::interpolate(input.y, result.y, m.y);
|
|
|
|
|
result.z = math::interpolate(input.z, result.z, m.z);
|
|
|
|
|
mask += 4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Float mask */
|
|
|
|
|
static void apply_and_advance_mask(float4 input, float4 &result, const float *&mask)
|
|
|
|
|
{
|
|
|
|
|
float3 m(mask);
|
|
|
|
|
result.x = math::interpolate(input.x, result.x, m.x);
|
|
|
|
|
result.y = math::interpolate(input.y, result.y, m.y);
|
|
|
|
|
result.z = math::interpolate(input.z, result.z, m.z);
|
|
|
|
|
mask += 4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* No mask */
|
|
|
|
|
static void apply_and_advance_mask(float4 /*input*/, float4 & /*result*/, const void *& /*mask*/)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Given `T` that implements an `apply` function:
|
|
|
|
|
*
|
|
|
|
|
* template <typename ImageT, typename MaskT>
|
|
|
|
|
* void apply(ImageT* image, const MaskT* mask, IndexRange size);
|
|
|
|
|
*
|
|
|
|
|
* this function calls the apply() function in parallel
|
|
|
|
|
* chunks of the image to process, and with needed
|
|
|
|
|
* uchar, float or void types (void is used for mask, when there is
|
|
|
|
|
* no masking). Both input and mask images are expected to have
|
|
|
|
|
* 4 (RGBA) color channels. Input is modified. */
|
|
|
|
|
template<typename T> static void apply_modifier_op(T &op, ImBuf *ibuf, const ImBuf *mask)
|
|
|
|
|
{
|
|
|
|
|
if (ibuf == nullptr) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2025-01-02 14:33:52 +01:00
|
|
|
BLI_assert_msg(ibuf->channels == 0 || ibuf->channels == 4,
|
|
|
|
|
"Sequencer only supports 4 channel images");
|
|
|
|
|
BLI_assert_msg(mask == nullptr || mask->channels == 0 || mask->channels == 4,
|
|
|
|
|
"Sequencer only supports 4 channel images");
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
|
|
|
|
|
threading::parallel_for(IndexRange(size_t(ibuf->x) * ibuf->y), 32 * 1024, [&](IndexRange range) {
|
|
|
|
|
uchar *image_byte = ibuf->byte_buffer.data;
|
|
|
|
|
float *image_float = ibuf->float_buffer.data;
|
|
|
|
|
const uchar *mask_byte = mask ? mask->byte_buffer.data : nullptr;
|
|
|
|
|
const float *mask_float = mask ? mask->float_buffer.data : nullptr;
|
|
|
|
|
const void *mask_none = nullptr;
|
|
|
|
|
int64_t offset = range.first() * 4;
|
|
|
|
|
|
|
|
|
|
/* Instantiate the needed processing function based on image/mask
|
|
|
|
|
* data types. */
|
|
|
|
|
if (image_byte) {
|
|
|
|
|
if (mask_byte) {
|
|
|
|
|
op.apply(image_byte + offset, mask_byte + offset, range);
|
|
|
|
|
}
|
|
|
|
|
else if (mask_float) {
|
|
|
|
|
op.apply(image_byte + offset, mask_float + offset, range);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
op.apply(image_byte + offset, mask_none, range);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (image_float) {
|
|
|
|
|
if (mask_byte) {
|
|
|
|
|
op.apply(image_float + offset, mask_byte + offset, range);
|
|
|
|
|
}
|
|
|
|
|
else if (mask_float) {
|
|
|
|
|
op.apply(image_float + offset, mask_float + offset, range);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
op.apply(image_float + offset, mask_none, range);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2020-11-05 13:33:27 +01:00
|
|
|
/**
|
2020-11-06 14:10:59 +01:00
|
|
|
* \a timeline_frame is offset by \a fra_offset only in case we are using a real mask.
|
2020-11-05 13:33:27 +01:00
|
|
|
*/
|
2025-03-06 13:04:39 +01:00
|
|
|
static ImBuf *modifier_render_mask_input(const RenderData *context,
|
2020-11-05 13:33:27 +01:00
|
|
|
int mask_input_type,
|
2025-05-01 00:22:04 +02:00
|
|
|
Strip *mask_strip,
|
2020-11-05 13:33:27 +01:00
|
|
|
Mask *mask_id,
|
2020-11-06 14:10:59 +01:00
|
|
|
int timeline_frame,
|
2024-12-06 18:43:52 +01:00
|
|
|
int fra_offset)
|
2020-11-05 13:33:27 +01:00
|
|
|
{
|
2023-07-20 09:46:24 +02:00
|
|
|
ImBuf *mask_input = nullptr;
|
2020-11-05 13:33:27 +01:00
|
|
|
|
|
|
|
|
if (mask_input_type == SEQUENCE_MASK_INPUT_STRIP) {
|
2025-05-01 00:22:04 +02:00
|
|
|
if (mask_strip) {
|
2020-11-05 13:33:27 +01:00
|
|
|
SeqRenderState state;
|
2025-05-01 00:22:04 +02:00
|
|
|
mask_input = seq_render_strip(context, &state, mask_strip, timeline_frame);
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (mask_input_type == SEQUENCE_MASK_INPUT_ID) {
|
2024-12-06 18:43:52 +01:00
|
|
|
/* Note that we do not request mask to be float image: if it is that is
|
|
|
|
|
* fine, but if it is a byte image then we also just take that without
|
|
|
|
|
* extra memory allocations or conversions. All modifiers are expected
|
|
|
|
|
* to handle mask being either type. */
|
|
|
|
|
mask_input = seq_render_mask(context, mask_id, timeline_frame - fra_offset, false);
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return mask_input;
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static ImBuf *modifier_mask_get(StripModifierData *smd,
|
2025-03-06 13:04:39 +01:00
|
|
|
const RenderData *context,
|
2020-11-06 14:10:59 +01:00
|
|
|
int timeline_frame,
|
2024-12-06 18:43:52 +01:00
|
|
|
int fra_offset)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2024-12-06 18:43:52 +01:00
|
|
|
return modifier_render_mask_input(
|
2025-05-01 00:22:04 +02:00
|
|
|
context, smd->mask_input_type, smd->mask_strip, smd->mask_id, timeline_frame, fra_offset);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Color Balance Modifier
|
|
|
|
|
* \{ */
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
/* Lift-Gamma-Gain math. NOTE: lift is actually (2-lift). */
|
|
|
|
|
static float color_balance_lgg(
|
2020-11-05 13:33:27 +01:00
|
|
|
float in, const float lift, const float gain, const float gamma, const float mul)
|
|
|
|
|
{
|
|
|
|
|
float x = (((in - 1.0f) * lift) + 1.0f) * gain;
|
|
|
|
|
|
|
|
|
|
/* prevent NaN */
|
2025-01-26 20:08:04 +01:00
|
|
|
x = std::max(x, 0.0f);
|
2020-11-05 13:33:27 +01:00
|
|
|
|
|
|
|
|
x = powf(x, gamma) * mul;
|
|
|
|
|
CLAMP(x, FLT_MIN, FLT_MAX);
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
/* Slope-Offset-Power (ASC CDL) math, see https://en.wikipedia.org/wiki/ASC_CDL */
|
|
|
|
|
static float color_balance_sop(
|
|
|
|
|
float in, const float slope, const float offset, const float power, float mul)
|
2020-11-05 13:33:27 +01:00
|
|
|
{
|
2021-09-30 21:09:47 +02:00
|
|
|
float x = in * slope + offset;
|
|
|
|
|
|
|
|
|
|
/* prevent NaN */
|
2025-01-26 20:08:04 +01:00
|
|
|
x = std::max(x, 0.0f);
|
2020-11-05 13:33:27 +01:00
|
|
|
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
x = powf(x, power);
|
2021-09-30 21:09:47 +02:00
|
|
|
x *= mul;
|
|
|
|
|
CLAMP(x, FLT_MIN, FLT_MAX);
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-13 10:56:26 +10:00
|
|
|
/**
|
|
|
|
|
* Use a larger lookup table than 256 possible byte values: due to alpha
|
|
|
|
|
* pre-multiplication, dark values with low alphas might need more precision.
|
|
|
|
|
*/
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
static constexpr int CB_TABLE_SIZE = 1024;
|
2021-09-30 21:09:47 +02:00
|
|
|
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
static void make_cb_table_lgg(
|
|
|
|
|
float lift, float gain, float gamma, float mul, float r_table[CB_TABLE_SIZE])
|
2021-09-30 21:09:47 +02:00
|
|
|
{
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
for (int i = 0; i < CB_TABLE_SIZE; i++) {
|
|
|
|
|
float x = float(i) * (1.0f / (CB_TABLE_SIZE - 1.0f));
|
|
|
|
|
r_table[i] = color_balance_lgg(x, lift, gain, gamma, mul);
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
static void make_cb_table_sop(
|
|
|
|
|
float slope, float offset, float power, float mul, float r_table[CB_TABLE_SIZE])
|
2020-11-05 13:33:27 +01:00
|
|
|
{
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
for (int i = 0; i < CB_TABLE_SIZE; i++) {
|
|
|
|
|
float x = float(i) * (1.0f / (CB_TABLE_SIZE - 1.0f));
|
|
|
|
|
r_table[i] = color_balance_sop(x, slope, offset, power, mul);
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
struct ColorBalanceApplyOp {
|
|
|
|
|
int method;
|
|
|
|
|
float3 lift, gain, gamma;
|
|
|
|
|
float3 slope, offset, power;
|
|
|
|
|
float multiplier;
|
|
|
|
|
float lut[3][CB_TABLE_SIZE];
|
|
|
|
|
|
|
|
|
|
/* Apply on a byte image via a table lookup. */
|
|
|
|
|
template<typename MaskT> void apply(uchar *image, const MaskT *mask, IndexRange size)
|
|
|
|
|
{
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
float4 input = load_pixel_premul(image);
|
|
|
|
|
|
|
|
|
|
float4 result;
|
|
|
|
|
int p0 = int(input.x * (CB_TABLE_SIZE - 1.0f) + 0.5f);
|
|
|
|
|
int p1 = int(input.y * (CB_TABLE_SIZE - 1.0f) + 0.5f);
|
|
|
|
|
int p2 = int(input.z * (CB_TABLE_SIZE - 1.0f) + 0.5f);
|
|
|
|
|
result.x = this->lut[0][p0];
|
|
|
|
|
result.y = this->lut[1][p1];
|
|
|
|
|
result.z = this->lut[2][p2];
|
|
|
|
|
result.w = input.w;
|
|
|
|
|
|
|
|
|
|
apply_and_advance_mask(input, result, mask);
|
|
|
|
|
store_pixel_premul(result, image);
|
|
|
|
|
image += 4;
|
2021-09-30 21:09:47 +02:00
|
|
|
}
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
|
|
|
|
|
/* Apply on a float image by doing full math. */
|
|
|
|
|
template<typename MaskT> void apply(float *image, const MaskT *mask, IndexRange size)
|
|
|
|
|
{
|
|
|
|
|
if (this->method == SEQ_COLOR_BALANCE_METHOD_LIFTGAMMAGAIN) {
|
|
|
|
|
/* Lift/Gamma/Gain */
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
float4 input = load_pixel_premul(image);
|
|
|
|
|
|
|
|
|
|
float4 result;
|
|
|
|
|
result.x = color_balance_lgg(
|
|
|
|
|
input.x, this->lift.x, this->gain.x, this->gamma.x, this->multiplier);
|
|
|
|
|
result.y = color_balance_lgg(
|
|
|
|
|
input.y, this->lift.y, this->gain.y, this->gamma.y, this->multiplier);
|
|
|
|
|
result.z = color_balance_lgg(
|
|
|
|
|
input.z, this->lift.z, this->gain.z, this->gamma.z, this->multiplier);
|
|
|
|
|
result.w = input.w;
|
|
|
|
|
|
|
|
|
|
apply_and_advance_mask(input, result, mask);
|
|
|
|
|
store_pixel_premul(result, image);
|
|
|
|
|
image += 4;
|
|
|
|
|
}
|
2024-12-06 18:43:52 +01:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
else if (this->method == SEQ_COLOR_BALANCE_METHOD_SLOPEOFFSETPOWER) {
|
|
|
|
|
/* Slope/Offset/Power */
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
float4 input = load_pixel_premul(image);
|
|
|
|
|
|
|
|
|
|
float4 result;
|
|
|
|
|
result.x = color_balance_sop(
|
|
|
|
|
input.x, this->slope.x, this->offset.x, this->power.x, this->multiplier);
|
|
|
|
|
result.y = color_balance_sop(
|
|
|
|
|
input.y, this->slope.y, this->offset.y, this->power.y, this->multiplier);
|
|
|
|
|
result.z = color_balance_sop(
|
|
|
|
|
input.z, this->slope.z, this->offset.z, this->power.z, this->multiplier);
|
|
|
|
|
result.w = input.w;
|
|
|
|
|
|
|
|
|
|
apply_and_advance_mask(input, result, mask);
|
|
|
|
|
store_pixel_premul(result, image);
|
|
|
|
|
image += 4;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
BLI_assert_unreachable();
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
void init_lgg(const StripColorBalance &data)
|
|
|
|
|
{
|
|
|
|
|
BLI_assert(data.method == SEQ_COLOR_BALANCE_METHOD_LIFTGAMMAGAIN);
|
|
|
|
|
|
|
|
|
|
this->lift = 2.0f - float3(data.lift);
|
|
|
|
|
if (data.flag & SEQ_COLOR_BALANCE_INVERSE_LIFT) {
|
|
|
|
|
for (int c = 0; c < 3; c++) {
|
|
|
|
|
/* tweak to give more subtle results
|
|
|
|
|
* values above 1.0 are scaled */
|
|
|
|
|
if (this->lift[c] > 1.0f) {
|
|
|
|
|
this->lift[c] = powf(this->lift[c] - 1.0f, 2.0f) + 1.0f;
|
|
|
|
|
}
|
|
|
|
|
this->lift[c] = 2.0f - this->lift[c];
|
2024-12-06 18:43:52 +01:00
|
|
|
}
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
|
|
|
|
|
this->gain = float3(data.gain);
|
|
|
|
|
if (data.flag & SEQ_COLOR_BALANCE_INVERSE_GAIN) {
|
|
|
|
|
this->gain = math::rcp(math::max(this->gain, float3(1.0e-6f)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this->gamma = float3(data.gamma);
|
|
|
|
|
if (!(data.flag & SEQ_COLOR_BALANCE_INVERSE_GAMMA)) {
|
|
|
|
|
this->gamma = math::rcp(math::max(this->gamma, float3(1.0e-6f)));
|
|
|
|
|
}
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
|
|
|
|
|
void init_sop(const StripColorBalance &data)
|
|
|
|
|
{
|
|
|
|
|
BLI_assert(data.method == SEQ_COLOR_BALANCE_METHOD_SLOPEOFFSETPOWER);
|
|
|
|
|
|
|
|
|
|
this->slope = float3(data.slope);
|
|
|
|
|
if (data.flag & SEQ_COLOR_BALANCE_INVERSE_SLOPE) {
|
|
|
|
|
this->slope = math::rcp(math::max(this->slope, float3(1.0e-6f)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this->offset = float3(data.offset) - 1.0f;
|
|
|
|
|
if (data.flag & SEQ_COLOR_BALANCE_INVERSE_OFFSET) {
|
|
|
|
|
this->offset = -this->offset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this->power = float3(data.power);
|
|
|
|
|
if (!(data.flag & SEQ_COLOR_BALANCE_INVERSE_POWER)) {
|
|
|
|
|
this->power = math::rcp(math::max(this->power, float3(1.0e-6f)));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void init(const ColorBalanceModifierData &data, bool byte_image)
|
|
|
|
|
{
|
|
|
|
|
this->multiplier = data.color_multiply;
|
|
|
|
|
this->method = data.color_balance.method;
|
|
|
|
|
|
|
|
|
|
if (this->method == SEQ_COLOR_BALANCE_METHOD_LIFTGAMMAGAIN) {
|
|
|
|
|
init_lgg(data.color_balance);
|
|
|
|
|
if (byte_image) {
|
|
|
|
|
for (int c = 0; c < 3; c++) {
|
|
|
|
|
make_cb_table_lgg(
|
|
|
|
|
this->lift[c], this->gain[c], this->gamma[c], this->multiplier, this->lut[c]);
|
|
|
|
|
}
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
}
|
|
|
|
|
else if (this->method == SEQ_COLOR_BALANCE_METHOD_SLOPEOFFSETPOWER) {
|
|
|
|
|
init_sop(data.color_balance);
|
|
|
|
|
if (byte_image) {
|
|
|
|
|
for (int c = 0; c < 3; c++) {
|
|
|
|
|
make_cb_table_sop(
|
|
|
|
|
this->slope[c], this->offset[c], this->power[c], this->multiplier, this->lut[c]);
|
|
|
|
|
}
|
2024-12-09 03:33:05 +01:00
|
|
|
}
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
else {
|
|
|
|
|
BLI_assert_unreachable();
|
|
|
|
|
}
|
2020-11-05 13:33:27 +01:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
};
|
2020-11-05 13:33:27 +01:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void colorBalance_init_data(StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
ColorBalanceModifierData *cbmd = (ColorBalanceModifierData *)smd;
|
|
|
|
|
|
|
|
|
|
cbmd->color_multiply = 1.0f;
|
2021-09-30 21:09:47 +02:00
|
|
|
cbmd->color_balance.method = 0;
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
for (int c = 0; c < 3; c++) {
|
2012-08-19 15:41:56 +00:00
|
|
|
cbmd->color_balance.lift[c] = 1.0f;
|
|
|
|
|
cbmd->color_balance.gamma[c] = 1.0f;
|
|
|
|
|
cbmd->color_balance.gain[c] = 1.0f;
|
2021-09-30 21:09:47 +02:00
|
|
|
cbmd->color_balance.slope[c] = 1.0f;
|
|
|
|
|
cbmd->color_balance.offset[c] = 1.0f;
|
|
|
|
|
cbmd->color_balance.power[c] = 1.0f;
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-11 12:42:03 +02:00
|
|
|
static void colorBalance_apply(const StripScreenQuad & /*quad*/,
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd,
|
2024-09-11 12:42:03 +02:00
|
|
|
ImBuf *ibuf,
|
|
|
|
|
ImBuf *mask)
|
2020-11-05 13:33:27 +01:00
|
|
|
{
|
VSE: Optimize the Color Balance modifier
Speedup the Color Balance VSE strip modifier, with two things:
- Generally, use a much lower overhead parallel_for, also with
lower grain size (32 image rows, instead of 64 that were used
before). This is what makes the "float" variant faster.
- For "byte" variant, create a precalculated lookup table instead
of doing all the math per-pixel. This was *almost* done in
existing code, except it was put into the code path that was
never-ever used. However, since this is all done on premultiplied
values, I'm using lookup table size of 1024 instead of 256, so
that semitransparent pixels get some more precision for
"in-between values". This LUT is what results in the main speedup
of "byte" variant.
Calculating Color Balance at 4K resolution, times in milliseconds:
- PC (Ryzen 5950X), PNG (byte): 22.2 -> 2.9 ms, EXR (float): 20.1 -> 15.2 ms
- Mac (M1 Max), PNG (byte): 28.9 -> 7.5 ms, EXR (float): 21.8 -> 8.5 ms
More timing details in PR.
Pull Request: https://projects.blender.org/blender/blender/pulls/127121
2024-09-05 19:21:35 +02:00
|
|
|
const ColorBalanceModifierData *cbmd = (const ColorBalanceModifierData *)smd;
|
2020-11-05 13:33:27 +01:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
ColorBalanceApplyOp op;
|
|
|
|
|
op.init(*cbmd, ibuf->byte_buffer.data != nullptr);
|
|
|
|
|
apply_modifier_op(op, ibuf, mask);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name White Balance Modifier
|
|
|
|
|
* \{ */
|
2015-12-28 11:55:14 +01:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void whiteBalance_init_data(StripModifierData *smd)
|
2015-12-28 11:55:14 +01:00
|
|
|
{
|
|
|
|
|
WhiteBalanceModifierData *cbmd = (WhiteBalanceModifierData *)smd;
|
|
|
|
|
copy_v3_fl(cbmd->white_value, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
struct WhiteBalanceApplyOp {
|
2015-12-28 11:55:14 +01:00
|
|
|
float multiplier[3];
|
|
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
template<typename ImageT, typename MaskT>
|
|
|
|
|
void apply(ImageT *image, const MaskT *mask, IndexRange size)
|
|
|
|
|
{
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
float4 input = load_pixel_premul(image);
|
2015-12-28 11:55:14 +01:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
float4 result;
|
|
|
|
|
result.w = input.w;
|
2018-10-19 09:07:40 +11:00
|
|
|
#if 0
|
|
|
|
|
mul_v3_v3(result, multiplier);
|
|
|
|
|
#else
|
2016-02-23 14:50:30 +11:00
|
|
|
/* similar to division without the clipping */
|
|
|
|
|
for (int i = 0; i < 3; i++) {
|
2024-12-10 10:20:18 +02:00
|
|
|
/* Prevent pow argument from being negative. This whole math
|
|
|
|
|
* breaks down overall with any HDR colors; would be good to
|
|
|
|
|
* revisit and do something more proper. */
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
float f = max_ff(1.0f - input[i], 0.0f);
|
|
|
|
|
result[i] = 1.0f - powf(f, this->multiplier[i]);
|
2016-02-23 14:50:30 +11:00
|
|
|
}
|
2018-10-19 09:07:40 +11:00
|
|
|
#endif
|
2015-12-28 11:55:14 +01:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
apply_and_advance_mask(input, result, mask);
|
|
|
|
|
store_pixel_premul(result, image);
|
|
|
|
|
image += 4;
|
2015-12-29 01:08:13 +11:00
|
|
|
}
|
2015-12-28 11:55:14 +01:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
};
|
2015-12-28 11:55:14 +01:00
|
|
|
|
2024-09-11 12:42:03 +02:00
|
|
|
static void whiteBalance_apply(const StripScreenQuad & /*quad*/,
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd,
|
2024-09-11 12:42:03 +02:00
|
|
|
ImBuf *ibuf,
|
|
|
|
|
ImBuf *mask)
|
2015-12-28 11:55:14 +01:00
|
|
|
{
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
const WhiteBalanceModifierData *data = (const WhiteBalanceModifierData *)smd;
|
2015-12-28 11:55:14 +01:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
WhiteBalanceApplyOp op;
|
|
|
|
|
op.multiplier[0] = (data->white_value[0] != 0.0f) ? 1.0f / data->white_value[0] : FLT_MAX;
|
|
|
|
|
op.multiplier[1] = (data->white_value[1] != 0.0f) ? 1.0f / data->white_value[1] : FLT_MAX;
|
|
|
|
|
op.multiplier[2] = (data->white_value[2] != 0.0f) ? 1.0f / data->white_value[2] : FLT_MAX;
|
|
|
|
|
apply_modifier_op(op, ibuf, mask);
|
2015-12-28 11:55:14 +01:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Curves Modifier
|
|
|
|
|
* \{ */
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void curves_init_data(StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
CurvesModifierData *cmd = (CurvesModifierData *)smd;
|
|
|
|
|
|
2023-08-30 22:36:36 +02:00
|
|
|
BKE_curvemapping_set_defaults(&cmd->curve_mapping, 4, 0.0f, 0.0f, 1.0f, 1.0f, HD_AUTO);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void curves_free_data(StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
CurvesModifierData *cmd = (CurvesModifierData *)smd;
|
|
|
|
|
|
2019-08-07 03:21:55 +10:00
|
|
|
BKE_curvemapping_free_data(&cmd->curve_mapping);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void curves_copy_data(StripModifierData *target, StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
CurvesModifierData *cmd = (CurvesModifierData *)smd;
|
|
|
|
|
CurvesModifierData *cmd_target = (CurvesModifierData *)target;
|
|
|
|
|
|
2019-08-07 03:21:55 +10:00
|
|
|
BKE_curvemapping_copy_data(&cmd_target->curve_mapping, &cmd->curve_mapping);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
struct CurvesApplyOp {
|
|
|
|
|
const CurveMapping *curve_mapping;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
template<typename ImageT, typename MaskT>
|
|
|
|
|
void apply(ImageT *image, const MaskT *mask, IndexRange size)
|
|
|
|
|
{
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
float4 input = load_pixel_premul(image);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
float4 result;
|
|
|
|
|
BKE_curvemapping_evaluate_premulRGBF(this->curve_mapping, result, input);
|
|
|
|
|
result.w = input.w;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
apply_and_advance_mask(input, result, mask);
|
|
|
|
|
store_pixel_premul(result, image);
|
|
|
|
|
image += 4;
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
};
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2024-09-11 12:42:03 +02:00
|
|
|
static void curves_apply(const StripScreenQuad & /*quad*/,
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd,
|
2024-09-11 12:42:03 +02:00
|
|
|
ImBuf *ibuf,
|
|
|
|
|
ImBuf *mask)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
CurvesModifierData *cmd = (CurvesModifierData *)smd;
|
|
|
|
|
|
2020-08-07 22:36:11 +10:00
|
|
|
const float black[3] = {0.0f, 0.0f, 0.0f};
|
|
|
|
|
const float white[3] = {1.0f, 1.0f, 1.0f};
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2020-08-01 13:02:21 +10:00
|
|
|
BKE_curvemapping_init(&cmd->curve_mapping);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2023-07-22 11:36:59 +10:00
|
|
|
BKE_curvemapping_premultiply(&cmd->curve_mapping, false);
|
2019-08-07 03:21:55 +10:00
|
|
|
BKE_curvemapping_set_black_white(&cmd->curve_mapping, black, white);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
CurvesApplyOp op;
|
|
|
|
|
op.curve_mapping = &cmd->curve_mapping;
|
|
|
|
|
apply_modifier_op(op, ibuf, mask);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2023-07-22 11:36:59 +10:00
|
|
|
BKE_curvemapping_premultiply(&cmd->curve_mapping, true);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Hue Correct Modifier
|
|
|
|
|
* \{ */
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void hue_correct_init_data(StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
HueCorrectModifierData *hcmd = (HueCorrectModifierData *)smd;
|
|
|
|
|
int c;
|
|
|
|
|
|
2023-08-30 22:36:36 +02:00
|
|
|
BKE_curvemapping_set_defaults(&hcmd->curve_mapping, 1, 0.0f, 0.0f, 1.0f, 1.0f, HD_AUTO);
|
Compositor: Make Hue Correct node wrap
Compositor: Make Hue Correct node wrap
This patch makes the Hue Correct node as well as the Hue Correct VSE
modifiers to wrap, such that no discontinuities occur for the red hue.
Since it now wraps, the default curve preset now exempts the last point
of the curve.
A new CUMA_USE_WRAPPING flag was added to specify wrapping for curve
maps. The implementation works by adding two virtual points before and
after the terminal points in the curve map, such that their handles
match, and would then produce a continues curve.
This is a breaking change, since existing curves were also adjusted
using versioning. However, the change will not be significant, since in
most realistic cases, the terminal points will be close to each other,
and even with wrapping, the connection will be very sharp, almost
matching the old behavior.
Pull Request: https://projects.blender.org/blender/blender/pulls/117114
2024-03-21 15:35:02 +01:00
|
|
|
hcmd->curve_mapping.preset = CURVE_PRESET_MID8;
|
2012-08-19 15:41:56 +00:00
|
|
|
|
|
|
|
|
for (c = 0; c < 3; c++) {
|
|
|
|
|
CurveMap *cuma = &hcmd->curve_mapping.cm[c];
|
2019-08-07 03:21:55 +10:00
|
|
|
BKE_curvemap_reset(
|
2012-08-19 15:41:56 +00:00
|
|
|
cuma, &hcmd->curve_mapping.clipr, hcmd->curve_mapping.preset, CURVEMAP_SLOPE_POSITIVE);
|
|
|
|
|
}
|
Compositor: Make Hue Correct node wrap
Compositor: Make Hue Correct node wrap
This patch makes the Hue Correct node as well as the Hue Correct VSE
modifiers to wrap, such that no discontinuities occur for the red hue.
Since it now wraps, the default curve preset now exempts the last point
of the curve.
A new CUMA_USE_WRAPPING flag was added to specify wrapping for curve
maps. The implementation works by adding two virtual points before and
after the terminal points in the curve map, such that their handles
match, and would then produce a continues curve.
This is a breaking change, since existing curves were also adjusted
using versioning. However, the change will not be significant, since in
most realistic cases, the terminal points will be close to each other,
and even with wrapping, the connection will be very sharp, almost
matching the old behavior.
Pull Request: https://projects.blender.org/blender/blender/pulls/117114
2024-03-21 15:35:02 +01:00
|
|
|
/* use wrapping for all hue correct modifiers */
|
|
|
|
|
hcmd->curve_mapping.flag |= CUMA_USE_WRAPPING;
|
2012-08-19 15:41:56 +00:00
|
|
|
/* default to showing Saturation */
|
|
|
|
|
hcmd->curve_mapping.cur = 1;
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void hue_correct_free_data(StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
HueCorrectModifierData *hcmd = (HueCorrectModifierData *)smd;
|
|
|
|
|
|
2019-08-07 03:21:55 +10:00
|
|
|
BKE_curvemapping_free_data(&hcmd->curve_mapping);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void hue_correct_copy_data(StripModifierData *target, StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
HueCorrectModifierData *hcmd = (HueCorrectModifierData *)smd;
|
|
|
|
|
HueCorrectModifierData *hcmd_target = (HueCorrectModifierData *)target;
|
|
|
|
|
|
2019-08-07 03:21:55 +10:00
|
|
|
BKE_curvemapping_copy_data(&hcmd_target->curve_mapping, &hcmd->curve_mapping);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
struct HueCorrectApplyOp {
|
|
|
|
|
const CurveMapping *curve_mapping;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
template<typename ImageT, typename MaskT>
|
|
|
|
|
void apply(ImageT *image, const MaskT *mask, IndexRange size)
|
|
|
|
|
{
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
/* NOTE: arguably incorrect usage of "raw" values, should be un-premultiplied.
|
|
|
|
|
* Not changing behavior for now, but would be good to fix someday. */
|
|
|
|
|
float4 input = load_pixel_raw(image);
|
|
|
|
|
float4 result;
|
|
|
|
|
result.w = input.w;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
float3 hsv;
|
|
|
|
|
rgb_to_hsv(input.x, input.y, input.z, &hsv.x, &hsv.y, &hsv.z);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-08-19 15:41:56 +00:00
|
|
|
/* adjust hue, scaling returned default 0.5 up to 1 */
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
float f;
|
|
|
|
|
f = BKE_curvemapping_evaluateF(this->curve_mapping, 0, hsv.x);
|
|
|
|
|
hsv.x += f - 0.5f;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-08-19 15:41:56 +00:00
|
|
|
/* adjust saturation, scaling returned default 0.5 up to 1 */
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
f = BKE_curvemapping_evaluateF(this->curve_mapping, 1, hsv.x);
|
|
|
|
|
hsv.y *= (f * 2.0f);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-08-19 15:41:56 +00:00
|
|
|
/* adjust value, scaling returned default 0.5 up to 1 */
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
f = BKE_curvemapping_evaluateF(this->curve_mapping, 2, hsv.x);
|
|
|
|
|
hsv.z *= (f * 2.0f);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
hsv.x = hsv.x - floorf(hsv.x); /* mod 1.0 */
|
|
|
|
|
hsv.y = math::clamp(hsv.y, 0.0f, 1.0f);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-08-19 15:41:56 +00:00
|
|
|
/* convert back to rgb */
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
hsv_to_rgb(hsv.x, hsv.y, hsv.z, &result.x, &result.y, &result.z);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
apply_and_advance_mask(input, result, mask);
|
|
|
|
|
store_pixel_raw(result, image);
|
|
|
|
|
image += 4;
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
};
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2024-09-11 12:42:03 +02:00
|
|
|
static void hue_correct_apply(const StripScreenQuad & /*quad*/,
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd,
|
2024-09-11 12:42:03 +02:00
|
|
|
ImBuf *ibuf,
|
|
|
|
|
ImBuf *mask)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
|
|
|
|
HueCorrectModifierData *hcmd = (HueCorrectModifierData *)smd;
|
|
|
|
|
|
2020-08-01 13:02:21 +10:00
|
|
|
BKE_curvemapping_init(&hcmd->curve_mapping);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
HueCorrectApplyOp op;
|
|
|
|
|
op.curve_mapping = &hcmd->curve_mapping;
|
|
|
|
|
apply_modifier_op(op, ibuf, mask);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
2023-04-11 21:32:29 +02:00
|
|
|
/** \name Brightness/Contrast Modifier
|
2020-06-16 12:32:42 +10:00
|
|
|
* \{ */
|
2012-08-24 09:07:04 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
struct BrightContrastApplyOp {
|
|
|
|
|
float mul;
|
|
|
|
|
float add;
|
|
|
|
|
|
|
|
|
|
template<typename ImageT, typename MaskT>
|
|
|
|
|
void apply(ImageT *image, const MaskT *mask, IndexRange size)
|
|
|
|
|
{
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
/* NOTE: arguably incorrect usage of "raw" values, should be un-premultiplied.
|
|
|
|
|
* Not changing behavior for now, but would be good to fix someday. */
|
|
|
|
|
float4 input = load_pixel_raw(image);
|
|
|
|
|
|
|
|
|
|
float4 result;
|
|
|
|
|
result = input * this->mul + this->add;
|
|
|
|
|
result.w = input.w;
|
|
|
|
|
|
|
|
|
|
apply_and_advance_mask(input, result, mask);
|
|
|
|
|
store_pixel_raw(result, image);
|
|
|
|
|
image += 4;
|
2012-08-24 09:07:04 +00:00
|
|
|
}
|
|
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
};
|
2012-08-24 09:07:04 +00:00
|
|
|
|
2024-09-11 12:42:03 +02:00
|
|
|
static void brightcontrast_apply(const StripScreenQuad & /*quad*/,
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd,
|
2024-09-11 12:42:03 +02:00
|
|
|
ImBuf *ibuf,
|
|
|
|
|
ImBuf *mask)
|
2012-08-24 09:07:04 +00:00
|
|
|
{
|
2024-09-15 23:14:09 +10:00
|
|
|
const BrightContrastModifierData *bcmd = (BrightContrastModifierData *)smd;
|
2012-08-24 09:07:04 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
BrightContrastApplyOp op;
|
|
|
|
|
|
|
|
|
|
/* The algorithm is by Werner D. Streidt
|
|
|
|
|
* (http://visca.com/ffactory/archives/5-99/msg00021.html)
|
|
|
|
|
* Extracted from OpenCV `demhist.cpp`. */
|
|
|
|
|
const float brightness = bcmd->bright / 100.0f;
|
|
|
|
|
const float contrast = bcmd->contrast;
|
|
|
|
|
float delta = contrast / 200.0f;
|
|
|
|
|
|
|
|
|
|
if (contrast > 0) {
|
|
|
|
|
op.mul = 1.0f - delta * 2.0f;
|
|
|
|
|
op.mul = 1.0f / max_ff(op.mul, FLT_EPSILON);
|
|
|
|
|
op.add = op.mul * (brightness - delta);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
delta *= -1;
|
|
|
|
|
op.mul = max_ff(1.0f - delta * 2.0f, 0.0f);
|
|
|
|
|
op.add = op.mul * brightness + delta;
|
|
|
|
|
}
|
2012-08-24 09:07:04 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
apply_modifier_op(op, ibuf, mask);
|
2012-08-24 09:07:04 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Mask Modifier
|
|
|
|
|
* \{ */
|
2013-05-08 14:20:57 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
static float load_mask_min(const uchar *&mask)
|
2013-05-08 14:20:57 +00:00
|
|
|
{
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
float m = float(min_iii(mask[0], mask[1], mask[2])) * (1.0f / 255.0f);
|
|
|
|
|
mask += 4;
|
|
|
|
|
return m;
|
|
|
|
|
}
|
|
|
|
|
static float load_mask_min(const float *&mask)
|
|
|
|
|
{
|
|
|
|
|
float m = min_fff(mask[0], mask[1], mask[2]);
|
|
|
|
|
mask += 4;
|
|
|
|
|
return m;
|
|
|
|
|
}
|
|
|
|
|
static float load_mask_min(const void *& /*mask*/)
|
|
|
|
|
{
|
|
|
|
|
return 1.0f;
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
struct MaskApplyOp {
|
|
|
|
|
template<typename ImageT, typename MaskT>
|
|
|
|
|
void apply(ImageT *image, const MaskT *mask, IndexRange size)
|
|
|
|
|
{
|
|
|
|
|
for ([[maybe_unused]] int64_t i : size) {
|
|
|
|
|
float m = load_mask_min(mask);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
if constexpr (std::is_same_v<ImageT, uchar>) {
|
|
|
|
|
/* Byte buffer is straight, so only affect on alpha itself, this is
|
|
|
|
|
* the only way to alpha-over byte strip after applying mask modifier. */
|
|
|
|
|
image[3] = uchar(image[3] * m);
|
2013-05-08 14:20:57 +00:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
else if constexpr (std::is_same_v<ImageT, float>) {
|
2024-12-06 18:43:52 +01:00
|
|
|
/* Float buffers are premultiplied, so need to premul color as well to make it
|
|
|
|
|
* easy to alpha-over masked strip. */
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
float4 pix(image);
|
|
|
|
|
pix *= m;
|
|
|
|
|
*reinterpret_cast<float4 *>(image) = pix;
|
2013-05-08 14:20:57 +00:00
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
image += 4;
|
2013-05-08 14:20:57 +00:00
|
|
|
}
|
|
|
|
|
}
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
};
|
2013-05-08 14:20:57 +00:00
|
|
|
|
2024-09-11 12:42:03 +02:00
|
|
|
static void maskmodifier_apply(const StripScreenQuad & /*quad*/,
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData * /*smd*/,
|
2024-09-11 12:42:03 +02:00
|
|
|
ImBuf *ibuf,
|
|
|
|
|
ImBuf *mask)
|
2013-05-08 14:20:57 +00:00
|
|
|
{
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
if (mask == nullptr || (mask->byte_buffer.data == nullptr && mask->float_buffer.data == nullptr))
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MaskApplyOp op;
|
|
|
|
|
apply_modifier_op(op, ibuf, mask);
|
2013-05-08 14:20:57 +00:00
|
|
|
|
VSE: Optimize and cleanup modifiers
Originally intended to be a code cleanup that makes the code shorter
(part of VSE quality project #130975), but as a side effect many
modifiers are now faster since they no longer do many branches in
the innermost pixel loop.
Main part is having apply_modifier_op that given the "modifier op"
functor object, instantiates the correct processing function based
on type of image (byte vs float) and mask (none, byte, float), for
a total of 6 possible cases. And then a helper like
apply_and_advance_mask that applies mask based on input and result
in a consistent and not "literal copy paste of code" way across the
modifiers.
Brightness/Contrast, Color Balance, Tonemap modifiers were already
optimized to move branches out of inner loops previously; their
performance remains unchanged. Mask modifier performance remains
unchanged; it is very simple and memory bandwidth limited on my
machine.
Other modifiers, tested on 4K resolution, Win10 / Ryzen 5950X, time
in milliseconds taken to apply the modifier calculation, on a byte
image with no mask:
- Curves: 12.1 -> 7.7ms
- Hue Correct: 24.5 -> 15.8ms
- White Balance: 20.5 -> 13.8ms
Same as above, but on a float image with a byte mask:
- Curves: 13.5 -> 12.3ms
- Hue Correct: 19.7 -> 16.4ms
- White Balance: 19.3 -> 15.9ms
Pull Request: https://projects.blender.org/blender/blender/pulls/131736
2024-12-16 09:32:37 +01:00
|
|
|
/* Image has gained transparency. */
|
2021-12-06 15:20:15 +01:00
|
|
|
ibuf->planes = R_IMF_PLANES_RGBA;
|
2013-05-08 14:20:57 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Tonemap Modifier
|
|
|
|
|
* \{ */
|
2016-01-19 15:53:43 +01:00
|
|
|
|
2023-07-20 09:46:24 +02:00
|
|
|
struct AvgLogLum {
|
2024-09-19 18:14:49 +02:00
|
|
|
const SequencerTonemapModifierData *tmmd;
|
2016-01-19 15:53:43 +01:00
|
|
|
float al;
|
|
|
|
|
float auto_key;
|
|
|
|
|
float lav;
|
2024-09-19 18:14:49 +02:00
|
|
|
float3 cav;
|
2016-01-19 15:53:43 +01:00
|
|
|
float igm;
|
2023-07-20 09:46:24 +02:00
|
|
|
};
|
2016-01-19 15:53:43 +01:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static void tonemapmodifier_init_data(StripModifierData *smd)
|
2016-01-19 15:53:43 +01:00
|
|
|
{
|
|
|
|
|
SequencerTonemapModifierData *tmmd = (SequencerTonemapModifierData *)smd;
|
2022-09-16 18:13:19 +10:00
|
|
|
/* Same as tone-map compositor node. */
|
2016-01-19 15:53:43 +01:00
|
|
|
tmmd->type = SEQ_TONEMAP_RD_PHOTORECEPTOR;
|
|
|
|
|
tmmd->key = 0.18f;
|
|
|
|
|
tmmd->offset = 1.0f;
|
|
|
|
|
tmmd->gamma = 1.0f;
|
|
|
|
|
tmmd->intensity = 0.0f;
|
|
|
|
|
tmmd->contrast = 0.0f;
|
|
|
|
|
tmmd->adaptation = 1.0f;
|
|
|
|
|
tmmd->correction = 0.0f;
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-19 18:14:49 +02:00
|
|
|
/* Convert chunk of float image pixels to scene linear space, in-place. */
|
Refactor: OpenColorIO integration
Briefly about this change:
- OpenColorIO C-API is removed.
- The information about color spaces in ImBuf module is removed.
It was stored in global ListBase in colormanagement.cc.
- Both OpenColorIO and fallback implementation supports GPU drawing.
- Fallback implementation supports white point, RGB curves, etc.
- Removed check for support of GPU drawing in IMB.
Historically it was implemented in a separate library with C-API, this
is because way back C++ code needed to stay in intern. This causes all
sort of overheads, and even calls that are strictly considered bad
level.
This change moves OpenColorIO integration into a module within imbuf,
next to movie, and next to IMB_colormanagement which is the main user
of it. This allows to avoid copy of color spaces, displays, views etc
in the ImBuf: they were used to help quickly querying information to
be shown on the interface. With this change it can be stored in the
same data structures as what is used by the OpenColorIO integration.
While it might not be fully avoiding duplication it is now less, and
there is no need in the user code to maintain the copies.
In a lot of cases this change also avoids allocations done per access
to the OpenColorIO. For example, it is not needed anymore to allocate
image descriptor in a heap.
The bigger user-visible change is that the fallback implementation now
supports GLSL drawing, with the whole list of supported features, such
as curve mapping and white point. This should help simplifying code
which relies on color space conversion on GPU: there is no need to
figure out fallback solution in such cases. The only case when drawing
will not work is when there is some actual bug, or driver issue, and
shader has failed to compile.
The change avoids having an opaque type for color space, and instead
uses forward declaration. It is a bit verbose on declaration, but helps
avoiding unsafe type-casts. There are ways to solve this in the future,
like having a header for forward declaration, or to flatten the name
space a bit.
There should be no user-level changes under normal operation.
When building without OpenColorIO or the configuration has a typo or
is missing a fuller set of color management tools is applies (such as the
white point correction).
Pull Request: https://projects.blender.org/blender/blender/pulls/138433
2025-05-09 14:01:43 +02:00
|
|
|
static void pixels_to_scene_linear_float(const ColorSpace *colorspace,
|
|
|
|
|
float4 *pixels,
|
|
|
|
|
int64_t count)
|
2016-01-19 15:53:43 +01:00
|
|
|
{
|
2024-09-19 18:14:49 +02:00
|
|
|
IMB_colormanagement_colorspace_to_scene_linear(
|
|
|
|
|
(float *)(pixels), int(count), 1, 4, colorspace, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Convert chunk of byte image pixels to scene linear space, into a destination array. */
|
Refactor: OpenColorIO integration
Briefly about this change:
- OpenColorIO C-API is removed.
- The information about color spaces in ImBuf module is removed.
It was stored in global ListBase in colormanagement.cc.
- Both OpenColorIO and fallback implementation supports GPU drawing.
- Fallback implementation supports white point, RGB curves, etc.
- Removed check for support of GPU drawing in IMB.
Historically it was implemented in a separate library with C-API, this
is because way back C++ code needed to stay in intern. This causes all
sort of overheads, and even calls that are strictly considered bad
level.
This change moves OpenColorIO integration into a module within imbuf,
next to movie, and next to IMB_colormanagement which is the main user
of it. This allows to avoid copy of color spaces, displays, views etc
in the ImBuf: they were used to help quickly querying information to
be shown on the interface. With this change it can be stored in the
same data structures as what is used by the OpenColorIO integration.
While it might not be fully avoiding duplication it is now less, and
there is no need in the user code to maintain the copies.
In a lot of cases this change also avoids allocations done per access
to the OpenColorIO. For example, it is not needed anymore to allocate
image descriptor in a heap.
The bigger user-visible change is that the fallback implementation now
supports GLSL drawing, with the whole list of supported features, such
as curve mapping and white point. This should help simplifying code
which relies on color space conversion on GPU: there is no need to
figure out fallback solution in such cases. The only case when drawing
will not work is when there is some actual bug, or driver issue, and
shader has failed to compile.
The change avoids having an opaque type for color space, and instead
uses forward declaration. It is a bit verbose on declaration, but helps
avoiding unsafe type-casts. There are ways to solve this in the future,
like having a header for forward declaration, or to flatten the name
space a bit.
There should be no user-level changes under normal operation.
When building without OpenColorIO or the configuration has a typo or
is missing a fuller set of color management tools is applies (such as the
white point correction).
Pull Request: https://projects.blender.org/blender/blender/pulls/138433
2025-05-09 14:01:43 +02:00
|
|
|
static void pixels_to_scene_linear_byte(const ColorSpace *colorspace,
|
2024-09-19 18:14:49 +02:00
|
|
|
const uchar *pixels,
|
|
|
|
|
float4 *dst,
|
|
|
|
|
int64_t count)
|
|
|
|
|
{
|
|
|
|
|
const uchar *bptr = pixels;
|
|
|
|
|
float4 *dst_ptr = dst;
|
|
|
|
|
for (int64_t i = 0; i < count; i++) {
|
|
|
|
|
straight_uchar_to_premul_float(*dst_ptr, bptr);
|
|
|
|
|
bptr += 4;
|
|
|
|
|
dst_ptr++;
|
|
|
|
|
}
|
|
|
|
|
IMB_colormanagement_colorspace_to_scene_linear(
|
|
|
|
|
(float *)dst, int(count), 1, 4, colorspace, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void scene_linear_to_image_chunk_float(ImBuf *ibuf, IndexRange range)
|
|
|
|
|
{
|
Refactor: OpenColorIO integration
Briefly about this change:
- OpenColorIO C-API is removed.
- The information about color spaces in ImBuf module is removed.
It was stored in global ListBase in colormanagement.cc.
- Both OpenColorIO and fallback implementation supports GPU drawing.
- Fallback implementation supports white point, RGB curves, etc.
- Removed check for support of GPU drawing in IMB.
Historically it was implemented in a separate library with C-API, this
is because way back C++ code needed to stay in intern. This causes all
sort of overheads, and even calls that are strictly considered bad
level.
This change moves OpenColorIO integration into a module within imbuf,
next to movie, and next to IMB_colormanagement which is the main user
of it. This allows to avoid copy of color spaces, displays, views etc
in the ImBuf: they were used to help quickly querying information to
be shown on the interface. With this change it can be stored in the
same data structures as what is used by the OpenColorIO integration.
While it might not be fully avoiding duplication it is now less, and
there is no need in the user code to maintain the copies.
In a lot of cases this change also avoids allocations done per access
to the OpenColorIO. For example, it is not needed anymore to allocate
image descriptor in a heap.
The bigger user-visible change is that the fallback implementation now
supports GLSL drawing, with the whole list of supported features, such
as curve mapping and white point. This should help simplifying code
which relies on color space conversion on GPU: there is no need to
figure out fallback solution in such cases. The only case when drawing
will not work is when there is some actual bug, or driver issue, and
shader has failed to compile.
The change avoids having an opaque type for color space, and instead
uses forward declaration. It is a bit verbose on declaration, but helps
avoiding unsafe type-casts. There are ways to solve this in the future,
like having a header for forward declaration, or to flatten the name
space a bit.
There should be no user-level changes under normal operation.
When building without OpenColorIO or the configuration has a typo or
is missing a fuller set of color management tools is applies (such as the
white point correction).
Pull Request: https://projects.blender.org/blender/blender/pulls/138433
2025-05-09 14:01:43 +02:00
|
|
|
const ColorSpace *colorspace = ibuf->float_buffer.colorspace;
|
2024-09-19 18:14:49 +02:00
|
|
|
float4 *fptr = reinterpret_cast<float4 *>(ibuf->float_buffer.data);
|
|
|
|
|
IMB_colormanagement_scene_linear_to_colorspace(
|
|
|
|
|
(float *)(fptr + range.first()), int(range.size()), 1, 4, colorspace);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void scene_linear_to_image_chunk_byte(float4 *src, ImBuf *ibuf, IndexRange range)
|
|
|
|
|
{
|
Refactor: OpenColorIO integration
Briefly about this change:
- OpenColorIO C-API is removed.
- The information about color spaces in ImBuf module is removed.
It was stored in global ListBase in colormanagement.cc.
- Both OpenColorIO and fallback implementation supports GPU drawing.
- Fallback implementation supports white point, RGB curves, etc.
- Removed check for support of GPU drawing in IMB.
Historically it was implemented in a separate library with C-API, this
is because way back C++ code needed to stay in intern. This causes all
sort of overheads, and even calls that are strictly considered bad
level.
This change moves OpenColorIO integration into a module within imbuf,
next to movie, and next to IMB_colormanagement which is the main user
of it. This allows to avoid copy of color spaces, displays, views etc
in the ImBuf: they were used to help quickly querying information to
be shown on the interface. With this change it can be stored in the
same data structures as what is used by the OpenColorIO integration.
While it might not be fully avoiding duplication it is now less, and
there is no need in the user code to maintain the copies.
In a lot of cases this change also avoids allocations done per access
to the OpenColorIO. For example, it is not needed anymore to allocate
image descriptor in a heap.
The bigger user-visible change is that the fallback implementation now
supports GLSL drawing, with the whole list of supported features, such
as curve mapping and white point. This should help simplifying code
which relies on color space conversion on GPU: there is no need to
figure out fallback solution in such cases. The only case when drawing
will not work is when there is some actual bug, or driver issue, and
shader has failed to compile.
The change avoids having an opaque type for color space, and instead
uses forward declaration. It is a bit verbose on declaration, but helps
avoiding unsafe type-casts. There are ways to solve this in the future,
like having a header for forward declaration, or to flatten the name
space a bit.
There should be no user-level changes under normal operation.
When building without OpenColorIO or the configuration has a typo or
is missing a fuller set of color management tools is applies (such as the
white point correction).
Pull Request: https://projects.blender.org/blender/blender/pulls/138433
2025-05-09 14:01:43 +02:00
|
|
|
const ColorSpace *colorspace = ibuf->byte_buffer.colorspace;
|
2024-09-19 18:14:49 +02:00
|
|
|
IMB_colormanagement_scene_linear_to_colorspace(
|
|
|
|
|
(float *)src, int(range.size()), 1, 4, colorspace);
|
|
|
|
|
const float4 *src_ptr = src;
|
|
|
|
|
uchar *bptr = ibuf->byte_buffer.data;
|
|
|
|
|
for (const int64_t idx : range) {
|
|
|
|
|
premul_float_to_straight_uchar(bptr + idx * 4, *src_ptr);
|
|
|
|
|
src_ptr++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void tonemap_simple(float4 *scene_linear,
|
|
|
|
|
ImBuf *mask,
|
|
|
|
|
IndexRange range,
|
|
|
|
|
const AvgLogLum &avg)
|
|
|
|
|
{
|
|
|
|
|
const float4 *mask_float = mask != nullptr ? (const float4 *)mask->float_buffer.data : nullptr;
|
|
|
|
|
const uchar4 *mask_byte = mask != nullptr ? (const uchar4 *)mask->byte_buffer.data : nullptr;
|
|
|
|
|
|
|
|
|
|
int64_t index = 0;
|
|
|
|
|
for (const int64_t pixel_index : range) {
|
|
|
|
|
float4 input = scene_linear[index];
|
|
|
|
|
|
|
|
|
|
/* Apply correction. */
|
|
|
|
|
float3 pixel = input.xyz() * avg.al;
|
|
|
|
|
float3 d = pixel + avg.tmmd->offset;
|
|
|
|
|
pixel.x /= (d.x == 0.0f) ? 1.0f : d.x;
|
|
|
|
|
pixel.y /= (d.y == 0.0f) ? 1.0f : d.y;
|
|
|
|
|
pixel.z /= (d.z == 0.0f) ? 1.0f : d.z;
|
|
|
|
|
const float igm = avg.igm;
|
|
|
|
|
if (igm != 0.0f) {
|
|
|
|
|
pixel.x = powf(math::max(pixel.x, 0.0f), igm);
|
|
|
|
|
pixel.y = powf(math::max(pixel.y, 0.0f), igm);
|
|
|
|
|
pixel.z = powf(math::max(pixel.z, 0.0f), igm);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Apply mask. */
|
|
|
|
|
if (mask != nullptr) {
|
|
|
|
|
float3 msk(1.0f);
|
2024-12-06 18:43:52 +01:00
|
|
|
if (mask_byte != nullptr) {
|
2024-09-19 18:14:49 +02:00
|
|
|
rgb_uchar_to_float(msk, mask_byte[pixel_index]);
|
2016-01-19 15:53:43 +01:00
|
|
|
}
|
2024-12-06 18:43:52 +01:00
|
|
|
else if (mask_float != nullptr) {
|
|
|
|
|
msk = mask_float[pixel_index].xyz();
|
|
|
|
|
}
|
2024-09-19 18:14:49 +02:00
|
|
|
pixel = math::interpolate(input.xyz(), pixel, msk);
|
2016-01-19 15:53:43 +01:00
|
|
|
}
|
2024-09-19 18:14:49 +02:00
|
|
|
|
|
|
|
|
scene_linear[index] = float4(pixel.x, pixel.y, pixel.z, input.w);
|
|
|
|
|
index++;
|
2016-01-19 15:53:43 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-19 18:14:49 +02:00
|
|
|
static void tonemap_rd_photoreceptor(float4 *scene_linear,
|
|
|
|
|
ImBuf *mask,
|
|
|
|
|
IndexRange range,
|
|
|
|
|
const AvgLogLum &avg)
|
2016-01-19 15:53:43 +01:00
|
|
|
{
|
2024-09-19 18:14:49 +02:00
|
|
|
const float4 *mask_float = mask != nullptr ? (const float4 *)mask->float_buffer.data : nullptr;
|
|
|
|
|
const uchar4 *mask_byte = mask != nullptr ? (const uchar4 *)mask->byte_buffer.data : nullptr;
|
|
|
|
|
|
|
|
|
|
const float f = expf(-avg.tmmd->intensity);
|
|
|
|
|
const float m = (avg.tmmd->contrast > 0.0f) ? avg.tmmd->contrast :
|
|
|
|
|
(0.3f + 0.7f * powf(avg.auto_key, 1.4f));
|
|
|
|
|
const float ic = 1.0f - avg.tmmd->correction, ia = 1.0f - avg.tmmd->adaptation;
|
|
|
|
|
|
|
|
|
|
int64_t index = 0;
|
|
|
|
|
for (const int64_t pixel_index : range) {
|
|
|
|
|
float4 input = scene_linear[index];
|
|
|
|
|
|
|
|
|
|
/* Apply correction. */
|
|
|
|
|
float3 pixel = input.xyz();
|
|
|
|
|
const float L = IMB_colormanagement_get_luminance(pixel);
|
|
|
|
|
float I_l = pixel.x + ic * (L - pixel.x);
|
|
|
|
|
float I_g = avg.cav.x + ic * (avg.lav - avg.cav.x);
|
|
|
|
|
float I_a = I_l + ia * (I_g - I_l);
|
|
|
|
|
pixel.x /= std::max(pixel.x + powf(f * I_a, m), 1.0e-30f);
|
|
|
|
|
I_l = pixel.y + ic * (L - pixel.y);
|
|
|
|
|
I_g = avg.cav.y + ic * (avg.lav - avg.cav.y);
|
|
|
|
|
I_a = I_l + ia * (I_g - I_l);
|
|
|
|
|
pixel.y /= std::max(pixel.y + powf(f * I_a, m), 1.0e-30f);
|
|
|
|
|
I_l = pixel.z + ic * (L - pixel.z);
|
|
|
|
|
I_g = avg.cav.z + ic * (avg.lav - avg.cav.z);
|
|
|
|
|
I_a = I_l + ia * (I_g - I_l);
|
|
|
|
|
pixel.z /= std::max(pixel.z + powf(f * I_a, m), 1.0e-30f);
|
|
|
|
|
|
|
|
|
|
/* Apply mask. */
|
|
|
|
|
if (mask != nullptr) {
|
|
|
|
|
float3 msk(1.0f);
|
2024-12-06 18:43:52 +01:00
|
|
|
if (mask_byte != nullptr) {
|
2024-09-19 18:14:49 +02:00
|
|
|
rgb_uchar_to_float(msk, mask_byte[pixel_index]);
|
2016-01-19 15:53:43 +01:00
|
|
|
}
|
2024-12-06 18:43:52 +01:00
|
|
|
else if (mask_float != nullptr) {
|
|
|
|
|
msk = mask_float[pixel_index].xyz();
|
|
|
|
|
}
|
2024-09-19 18:14:49 +02:00
|
|
|
pixel = math::interpolate(input.xyz(), pixel, msk);
|
2016-01-19 15:53:43 +01:00
|
|
|
}
|
2024-09-19 18:14:49 +02:00
|
|
|
|
|
|
|
|
scene_linear[index] = float4(pixel.x, pixel.y, pixel.z, input.w);
|
|
|
|
|
index++;
|
2016-01-19 15:53:43 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-11 12:42:03 +02:00
|
|
|
static bool is_point_inside_quad(const StripScreenQuad &quad, int x, int y)
|
|
|
|
|
{
|
2024-09-26 10:17:04 +03:00
|
|
|
float2 pt(x + 0.5f, y + 0.5f);
|
2024-09-11 12:42:03 +02:00
|
|
|
return isect_point_quad_v2(pt, quad.v0, quad.v1, quad.v2, quad.v3);
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-19 18:14:49 +02:00
|
|
|
struct AreaLuminance {
|
|
|
|
|
int64_t pixel_count = 0;
|
|
|
|
|
double sum = 0.0f;
|
|
|
|
|
float3 color_sum = {0, 0, 0};
|
|
|
|
|
double log_sum = 0.0;
|
|
|
|
|
float min = FLT_MAX;
|
|
|
|
|
float max = -FLT_MAX;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void tonemap_calc_chunk_luminance(const StripScreenQuad &quad,
|
|
|
|
|
const bool all_pixels_inside_quad,
|
|
|
|
|
const int width,
|
|
|
|
|
const IndexRange y_range,
|
|
|
|
|
const float4 *scene_linear,
|
|
|
|
|
AreaLuminance &r_lum)
|
2016-01-19 15:53:43 +01:00
|
|
|
{
|
2024-09-19 18:14:49 +02:00
|
|
|
for (const int y : y_range) {
|
|
|
|
|
for (int x = 0; x < width; x++) {
|
|
|
|
|
if (all_pixels_inside_quad || is_point_inside_quad(quad, x, y)) {
|
|
|
|
|
float4 pixel = *scene_linear;
|
|
|
|
|
r_lum.pixel_count++;
|
|
|
|
|
float L = IMB_colormanagement_get_luminance(pixel);
|
|
|
|
|
r_lum.sum += L;
|
|
|
|
|
r_lum.color_sum.x += pixel.x;
|
|
|
|
|
r_lum.color_sum.y += pixel.y;
|
|
|
|
|
r_lum.color_sum.z += pixel.z;
|
|
|
|
|
r_lum.log_sum += logf(math::max(L, 0.0f) + 1e-5f);
|
|
|
|
|
r_lum.max = math::max(r_lum.max, L);
|
|
|
|
|
r_lum.min = math::min(r_lum.min, L);
|
|
|
|
|
}
|
|
|
|
|
scene_linear++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-09-11 12:42:03 +02:00
|
|
|
|
2024-09-19 18:14:49 +02:00
|
|
|
static AreaLuminance tonemap_calc_input_luminance(const StripScreenQuad &quad, const ImBuf *ibuf)
|
|
|
|
|
{
|
2024-09-11 12:42:03 +02:00
|
|
|
/* Pixels outside the pre-transform strip area are ignored for luminance calculations.
|
|
|
|
|
* If strip area covers whole image, we can trivially accept all pixels. */
|
|
|
|
|
const bool all_pixels_inside_quad = is_point_inside_quad(quad, 0, 0) &&
|
|
|
|
|
is_point_inside_quad(quad, ibuf->x - 1, 0) &&
|
|
|
|
|
is_point_inside_quad(quad, 0, ibuf->y - 1) &&
|
|
|
|
|
is_point_inside_quad(quad, ibuf->x - 1, ibuf->y - 1);
|
2024-09-19 18:14:49 +02:00
|
|
|
|
|
|
|
|
AreaLuminance lum;
|
|
|
|
|
lum = threading::parallel_reduce(
|
|
|
|
|
IndexRange(ibuf->y),
|
|
|
|
|
32,
|
|
|
|
|
lum,
|
|
|
|
|
/* Calculate luminance for a chunk. */
|
|
|
|
|
[&](const IndexRange y_range, const AreaLuminance &init) {
|
|
|
|
|
AreaLuminance lum = init;
|
|
|
|
|
const int64_t chunk_size = y_range.size() * ibuf->x;
|
2024-09-20 13:14:57 +10:00
|
|
|
/* For float images, convert to scene-linear in place. The rest
|
|
|
|
|
* of tone-mapper can then continue with scene-linear values. */
|
2024-09-19 18:14:49 +02:00
|
|
|
if (ibuf->float_buffer.data != nullptr) {
|
|
|
|
|
float4 *fptr = reinterpret_cast<float4 *>(ibuf->float_buffer.data);
|
|
|
|
|
fptr += y_range.first() * ibuf->x;
|
|
|
|
|
pixels_to_scene_linear_float(ibuf->float_buffer.colorspace, fptr, chunk_size);
|
|
|
|
|
tonemap_calc_chunk_luminance(quad, all_pixels_inside_quad, ibuf->x, y_range, fptr, lum);
|
2024-09-11 12:42:03 +02:00
|
|
|
}
|
|
|
|
|
else {
|
2024-09-19 18:14:49 +02:00
|
|
|
const uchar *bptr = ibuf->byte_buffer.data + y_range.first() * ibuf->x * 4;
|
|
|
|
|
Array<float4> scene_linear(chunk_size);
|
|
|
|
|
pixels_to_scene_linear_byte(
|
|
|
|
|
ibuf->byte_buffer.colorspace, bptr, scene_linear.data(), chunk_size);
|
|
|
|
|
tonemap_calc_chunk_luminance(
|
|
|
|
|
quad, all_pixels_inside_quad, ibuf->x, y_range, scene_linear.data(), lum);
|
2024-09-11 12:42:03 +02:00
|
|
|
}
|
2024-09-19 18:14:49 +02:00
|
|
|
return lum;
|
|
|
|
|
},
|
|
|
|
|
/* Reduce luminance results. */
|
|
|
|
|
[&](const AreaLuminance &a, const AreaLuminance &b) {
|
|
|
|
|
AreaLuminance res;
|
|
|
|
|
res.pixel_count = a.pixel_count + b.pixel_count;
|
|
|
|
|
res.sum = a.sum + b.sum;
|
|
|
|
|
res.color_sum = a.color_sum + b.color_sum;
|
|
|
|
|
res.log_sum = a.log_sum + b.log_sum;
|
|
|
|
|
res.min = math::min(a.min, b.min);
|
|
|
|
|
res.max = math::max(a.max, b.max);
|
|
|
|
|
return res;
|
|
|
|
|
});
|
|
|
|
|
return lum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void tonemapmodifier_apply(const StripScreenQuad &quad,
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd,
|
2024-09-19 18:14:49 +02:00
|
|
|
ImBuf *ibuf,
|
|
|
|
|
ImBuf *mask)
|
|
|
|
|
{
|
|
|
|
|
const SequencerTonemapModifierData *tmmd = (const SequencerTonemapModifierData *)smd;
|
|
|
|
|
|
|
|
|
|
AreaLuminance lum = tonemap_calc_input_luminance(quad, ibuf);
|
|
|
|
|
if (lum.pixel_count == 0) {
|
2024-09-11 12:42:03 +02:00
|
|
|
return; /* Strip is zero size or off-screen. */
|
|
|
|
|
}
|
2024-09-19 18:14:49 +02:00
|
|
|
|
|
|
|
|
AvgLogLum data;
|
|
|
|
|
data.tmmd = tmmd;
|
|
|
|
|
data.lav = lum.sum / lum.pixel_count;
|
|
|
|
|
data.cav.x = lum.color_sum.x / lum.pixel_count;
|
|
|
|
|
data.cav.y = lum.color_sum.y / lum.pixel_count;
|
|
|
|
|
data.cav.z = lum.color_sum.z / lum.pixel_count;
|
|
|
|
|
float maxl = log(double(lum.max) + 1e-5f);
|
|
|
|
|
float minl = log(double(lum.min) + 1e-5f);
|
|
|
|
|
float avl = lum.log_sum / lum.pixel_count;
|
2016-01-19 15:53:43 +01:00
|
|
|
data.auto_key = (maxl > minl) ? ((maxl - avl) / (maxl - minl)) : 1.0f;
|
2024-09-19 18:14:49 +02:00
|
|
|
float al = exp(double(avl));
|
2016-01-19 15:53:43 +01:00
|
|
|
data.al = (al == 0.0f) ? 0.0f : (tmmd->key / al);
|
|
|
|
|
data.igm = (tmmd->gamma == 0.0f) ? 1.0f : (1.0f / tmmd->gamma);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-09-19 18:14:49 +02:00
|
|
|
threading::parallel_for(
|
|
|
|
|
IndexRange(int64_t(ibuf->x) * ibuf->y), 64 * 1024, [&](IndexRange range) {
|
|
|
|
|
if (ibuf->float_buffer.data != nullptr) {
|
|
|
|
|
/* Float pixels: no need for temporary storage. Luminance calculation already converted
|
|
|
|
|
* data to scene linear. */
|
|
|
|
|
float4 *pixels = (float4 *)(ibuf->float_buffer.data) + range.first();
|
|
|
|
|
if (tmmd->type == SEQ_TONEMAP_RD_PHOTORECEPTOR) {
|
|
|
|
|
tonemap_rd_photoreceptor(pixels, mask, range, data);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
BLI_assert(tmmd->type == SEQ_TONEMAP_RH_SIMPLE);
|
|
|
|
|
tonemap_simple(pixels, mask, range, data);
|
|
|
|
|
}
|
|
|
|
|
scene_linear_to_image_chunk_float(ibuf, range);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* Byte pixels: temporary storage for scene linear pixel values. */
|
|
|
|
|
Array<float4> scene_linear(range.size());
|
|
|
|
|
pixels_to_scene_linear_byte(ibuf->byte_buffer.colorspace,
|
|
|
|
|
ibuf->byte_buffer.data + range.first() * 4,
|
|
|
|
|
scene_linear.data(),
|
|
|
|
|
range.size());
|
|
|
|
|
if (tmmd->type == SEQ_TONEMAP_RD_PHOTORECEPTOR) {
|
|
|
|
|
tonemap_rd_photoreceptor(scene_linear.data(), mask, range, data);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
BLI_assert(tmmd->type == SEQ_TONEMAP_RH_SIMPLE);
|
|
|
|
|
tonemap_simple(scene_linear.data(), mask, range, data);
|
|
|
|
|
}
|
|
|
|
|
scene_linear_to_image_chunk_byte(scene_linear.data(), ibuf, range);
|
|
|
|
|
}
|
|
|
|
|
});
|
2016-01-19 15:53:43 +01:00
|
|
|
}
|
|
|
|
|
|
2020-06-16 12:32:42 +10:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Public Modifier Functions
|
|
|
|
|
* \{ */
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
static StripModifierTypeInfo modifiersTypes[NUM_SEQUENCE_MODIFIER_TYPES] = {
|
2024-12-16 15:18:42 +01:00
|
|
|
{}, /* First entry is unused. */
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "Color Balance"),
|
|
|
|
|
/*struct_name*/ "ColorBalanceModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(ColorBalanceModifierData),
|
|
|
|
|
/*init_data*/ colorBalance_init_data,
|
|
|
|
|
/*free_data*/ nullptr,
|
|
|
|
|
/*copy_data*/ nullptr,
|
|
|
|
|
/*apply*/ colorBalance_apply,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "Curves"),
|
|
|
|
|
/*struct_name*/ "CurvesModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(CurvesModifierData),
|
|
|
|
|
/*init_data*/ curves_init_data,
|
|
|
|
|
/*free_data*/ curves_free_data,
|
|
|
|
|
/*copy_data*/ curves_copy_data,
|
|
|
|
|
/*apply*/ curves_apply,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "Hue Correct"),
|
|
|
|
|
/*struct_name*/ "HueCorrectModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(HueCorrectModifierData),
|
|
|
|
|
/*init_data*/ hue_correct_init_data,
|
|
|
|
|
/*free_data*/ hue_correct_free_data,
|
|
|
|
|
/*copy_data*/ hue_correct_copy_data,
|
|
|
|
|
/*apply*/ hue_correct_apply,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "Brightness/Contrast"),
|
|
|
|
|
/*struct_name*/ "BrightContrastModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(BrightContrastModifierData),
|
|
|
|
|
/*init_data*/ nullptr,
|
|
|
|
|
/*free_data*/ nullptr,
|
|
|
|
|
/*copy_data*/ nullptr,
|
|
|
|
|
/*apply*/ brightcontrast_apply,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "Mask"),
|
|
|
|
|
/*struct_name*/ "SequencerMaskModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(SequencerMaskModifierData),
|
|
|
|
|
/*init_data*/ nullptr,
|
|
|
|
|
/*free_data*/ nullptr,
|
|
|
|
|
/*copy_data*/ nullptr,
|
|
|
|
|
/*apply*/ maskmodifier_apply,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "White Balance"),
|
|
|
|
|
/*struct_name*/ "WhiteBalanceModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(WhiteBalanceModifierData),
|
|
|
|
|
/*init_data*/ whiteBalance_init_data,
|
|
|
|
|
/*free_data*/ nullptr,
|
|
|
|
|
/*copy_data*/ nullptr,
|
|
|
|
|
/*apply*/ whiteBalance_apply,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "Tonemap"),
|
|
|
|
|
/*struct_name*/ "SequencerTonemapModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(SequencerTonemapModifierData),
|
|
|
|
|
/*init_data*/ tonemapmodifier_init_data,
|
|
|
|
|
/*free_data*/ nullptr,
|
|
|
|
|
/*copy_data*/ nullptr,
|
|
|
|
|
/*apply*/ tonemapmodifier_apply,
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/*name*/ CTX_N_(BLT_I18NCONTEXT_ID_SEQUENCE, "Equalizer"),
|
|
|
|
|
/*struct_name*/ "SoundEqualizerModifierData",
|
|
|
|
|
/*struct_size*/ sizeof(SoundEqualizerModifierData),
|
2025-03-06 13:04:39 +01:00
|
|
|
/*init_data*/ sound_equalizermodifier_init_data,
|
|
|
|
|
/*free_data*/ sound_equalizermodifier_free,
|
|
|
|
|
/*copy_data*/ sound_equalizermodifier_copy_data,
|
2024-12-16 15:18:42 +01:00
|
|
|
/*apply*/ nullptr,
|
|
|
|
|
},
|
|
|
|
|
};
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
const StripModifierTypeInfo *modifier_type_info_get(int type)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2024-12-16 15:18:42 +01:00
|
|
|
if (type <= 0 || type >= NUM_SEQUENCE_MODIFIER_TYPES) {
|
|
|
|
|
return nullptr;
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
2024-12-16 15:18:42 +01:00
|
|
|
return &modifiersTypes[type];
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *modifier_new(Strip *strip, const char *name, int type)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd;
|
2025-03-06 13:04:39 +01:00
|
|
|
const StripModifierTypeInfo *smti = modifier_type_info_get(type);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
smd = static_cast<StripModifierData *>(MEM_callocN(smti->struct_size, "sequence modifier"));
|
2012-08-19 15:41:56 +00:00
|
|
|
|
|
|
|
|
smd->type = type;
|
|
|
|
|
smd->flag |= SEQUENCE_MODIFIER_EXPANDED;
|
|
|
|
|
|
2019-04-22 09:39:35 +10:00
|
|
|
if (!name || !name[0]) {
|
2023-05-09 12:50:37 +10:00
|
|
|
STRNCPY(smd->name, smti->name);
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
|
|
|
|
else {
|
2023-05-09 12:50:37 +10:00
|
|
|
STRNCPY(smd->name, name);
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-01-07 14:09:45 +01:00
|
|
|
BLI_addtail(&strip->modifiers, smd);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
modifier_unique_name(strip, smd);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2019-04-22 09:39:35 +10:00
|
|
|
if (smti->init_data) {
|
2012-08-19 15:41:56 +00:00
|
|
|
smti->init_data(smd);
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
2012-08-20 10:15:32 +00:00
|
|
|
|
|
|
|
|
return smd;
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
bool modifier_remove(Strip *strip, StripModifierData *smd)
|
2012-08-20 10:15:32 +00:00
|
|
|
{
|
2025-01-07 14:09:45 +01:00
|
|
|
if (BLI_findindex(&strip->modifiers, smd) == -1) {
|
2014-03-20 15:45:20 +06:00
|
|
|
return false;
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
2012-08-20 10:15:32 +00:00
|
|
|
|
2025-01-07 14:09:45 +01:00
|
|
|
BLI_remlink(&strip->modifiers, smd);
|
2025-03-06 13:04:39 +01:00
|
|
|
modifier_free(smd);
|
2012-08-20 10:15:32 +00:00
|
|
|
|
2014-03-20 15:45:20 +06:00
|
|
|
return true;
|
2012-08-20 10:15:32 +00:00
|
|
|
}
|
|
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
void modifier_clear(Strip *strip)
|
2012-08-20 10:15:32 +00:00
|
|
|
{
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *smd, *smd_next;
|
2012-08-20 10:15:32 +00:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
for (smd = static_cast<StripModifierData *>(strip->modifiers.first); smd; smd = smd_next) {
|
2012-08-20 10:15:32 +00:00
|
|
|
smd_next = smd->next;
|
2025-03-06 13:04:39 +01:00
|
|
|
modifier_free(smd);
|
2012-08-20 10:15:32 +00:00
|
|
|
}
|
|
|
|
|
|
2025-01-07 14:09:45 +01:00
|
|
|
BLI_listbase_clear(&strip->modifiers);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
void modifier_free(StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2025-03-06 13:04:39 +01:00
|
|
|
const StripModifierTypeInfo *smti = modifier_type_info_get(smd->type);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
|
|
|
|
if (smti && smti->free_data) {
|
|
|
|
|
smti->free_data(smd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MEM_freeN(smd);
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
void modifier_unique_name(Strip *strip, StripModifierData *smd)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2025-03-06 13:04:39 +01:00
|
|
|
const StripModifierTypeInfo *smti = modifier_type_info_get(smd->type);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2025-01-07 14:09:45 +01:00
|
|
|
BLI_uniquename(&strip->modifiers,
|
2015-08-16 17:32:01 +10:00
|
|
|
smd,
|
|
|
|
|
CTX_DATA_(BLT_I18NCONTEXT_ID_SEQUENCE, smti->name),
|
|
|
|
|
'.',
|
2025-05-01 00:22:04 +02:00
|
|
|
offsetof(StripModifierData, name),
|
2013-03-25 08:29:06 +00:00
|
|
|
sizeof(smd->name));
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
StripModifierData *modifier_find_by_name(Strip *strip, const char *name)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2025-05-01 00:22:04 +02:00
|
|
|
return static_cast<StripModifierData *>(
|
|
|
|
|
BLI_findstring(&(strip->modifiers), name, offsetof(StripModifierData, name)));
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
static bool skip_modifier(Scene *scene, const StripModifierData *smd, int timeline_frame)
|
2024-05-07 19:37:24 +02:00
|
|
|
{
|
|
|
|
|
using namespace blender::seq;
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
if (smd->mask_strip == nullptr) {
|
2024-05-07 19:37:24 +02:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
const bool strip_has_ended_skip = smd->mask_input_type == SEQUENCE_MASK_INPUT_STRIP &&
|
|
|
|
|
smd->mask_time == SEQUENCE_MASK_TIME_RELATIVE &&
|
2025-03-06 13:04:39 +01:00
|
|
|
!time_strip_intersects_frame(
|
2025-05-01 00:22:04 +02:00
|
|
|
scene, smd->mask_strip, timeline_frame);
|
|
|
|
|
const bool missing_data_skip = !strip_has_valid_data(smd->mask_strip) ||
|
|
|
|
|
media_presence_is_missing(scene, smd->mask_strip);
|
2024-05-07 19:37:24 +02:00
|
|
|
|
|
|
|
|
return strip_has_ended_skip || missing_data_skip;
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
void modifier_apply_stack(const RenderData *context,
|
|
|
|
|
const Strip *strip,
|
|
|
|
|
ImBuf *ibuf,
|
|
|
|
|
int timeline_frame)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2025-01-07 14:09:45 +01:00
|
|
|
const StripScreenQuad quad = get_strip_screen_quad(context, strip);
|
2024-09-11 12:42:03 +02:00
|
|
|
|
2025-01-07 14:09:45 +01:00
|
|
|
if (strip->modifiers.first && (strip->flag & SEQ_USE_LINEAR_MODIFIERS)) {
|
2025-03-06 13:04:39 +01:00
|
|
|
render_imbuf_from_sequencer_space(context->scene, ibuf);
|
Color Management, Stage 2: Switch color pipeline to use OpenColorIO
Replace old color pipeline which was supporting linear/sRGB color spaces
only with OpenColorIO-based pipeline.
This introduces two configurable color spaces:
- Input color space for images and movie clips. This space is used to convert
images/movies from color space in which file is saved to Blender's linear
space (for float images, byte images are not internally converted, only input
space is stored for such images and used later).
This setting could be found in image/clip data block settings.
- Display color space which defines space in which particular display is working.
This settings could be found in scene's Color Management panel.
When render result is being displayed on the screen, apart from converting image
to display space, some additional conversions could happen.
This conversions are:
- View, which defines tone curve applying before display transformation.
These are different ways to view the image on the same display device.
For example it could be used to emulate film view on sRGB display.
- Exposure affects on image exposure before tone map is applied.
- Gamma is post-display gamma correction, could be used to match particular
display gamma.
- RGB curves are user-defined curves which are applying before display
transformation, could be used for different purposes.
All this settings by default are only applying on render result and does not
affect on other images. If some particular image needs to be affected by this
transformation, "View as Render" setting of image data block should be set to
truth. Movie clips are always affected by all display transformations.
This commit also introduces configurable color space in which sequencer is
working. This setting could be found in scene's Color Management panel and
it should be used if such stuff as grading needs to be done in color space
different from sRGB (i.e. when Film view on sRGB display is use, using VD16
space as sequencer's internal space would make grading working in space
which is close to the space using for display).
Some technical notes:
- Image buffer's float buffer is now always in linear space, even if it was
created from 16bit byte images.
- Space of byte buffer is stored in image buffer's rect_colorspace property.
- Profile of image buffer was removed since it's not longer meaningful.
- OpenGL and GLSL is supposed to always work in sRGB space. It is possible
to support other spaces, but it's quite large project which isn't so
much important.
- Legacy Color Management option disabled is emulated by using None display.
It could have some regressions, but there's no clear way to avoid them.
- If OpenColorIO is disabled on build time, it should make blender behaving
in the same way as previous release with color management enabled.
More details could be found at this page (more details would be added soon):
http://wiki.blender.org/index.php/Dev:Ref/Release_Notes/2.64/Color_Management
--
Thanks to Xavier Thomas, Lukas Toene for initial work on OpenColorIO
integration and to Brecht van Lommel for some further development and code/
usecase review!
2012-09-15 10:05:07 +00:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
LISTBASE_FOREACH (StripModifierData *, smd, &strip->modifiers) {
|
2025-03-06 13:04:39 +01:00
|
|
|
const StripModifierTypeInfo *smti = modifier_type_info_get(smd->type);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-08-19 15:41:56 +00:00
|
|
|
/* could happen if modifier is being removed or not exists in current version of blender */
|
2019-04-22 09:39:35 +10:00
|
|
|
if (!smti) {
|
2012-08-19 15:41:56 +00:00
|
|
|
continue;
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-08-19 15:41:56 +00:00
|
|
|
/* modifier is muted, do nothing */
|
2019-04-22 09:39:35 +10:00
|
|
|
if (smd->flag & SEQUENCE_MODIFIER_MUTE) {
|
2012-08-19 15:41:56 +00:00
|
|
|
continue;
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-05-07 19:37:24 +02:00
|
|
|
if (smti->apply && !skip_modifier(context->scene, smd, timeline_frame)) {
|
2016-01-25 11:16:49 +01:00
|
|
|
int frame_offset;
|
|
|
|
|
if (smd->mask_time == SEQUENCE_MASK_TIME_RELATIVE) {
|
2025-01-07 14:09:45 +01:00
|
|
|
frame_offset = strip->start;
|
2016-01-25 11:16:49 +01:00
|
|
|
}
|
2021-06-26 21:35:18 +10:00
|
|
|
else /* if (smd->mask_time == SEQUENCE_MASK_TIME_ABSOLUTE) */ {
|
2019-11-11 10:52:26 -03:00
|
|
|
frame_offset = smd->mask_id ? ((Mask *)smd->mask_id)->sfra : 0;
|
2016-01-25 11:16:49 +01:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-12-06 18:43:52 +01:00
|
|
|
ImBuf *mask = modifier_mask_get(smd, context, timeline_frame, frame_offset);
|
2024-09-11 12:42:03 +02:00
|
|
|
smti->apply(quad, smd, ibuf, mask);
|
2019-04-22 09:39:35 +10:00
|
|
|
if (mask) {
|
2012-08-19 15:41:56 +00:00
|
|
|
IMB_freeImBuf(mask);
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2025-01-07 14:09:45 +01:00
|
|
|
if (strip->modifiers.first && (strip->flag & SEQ_USE_LINEAR_MODIFIERS)) {
|
2024-09-09 14:55:24 +02:00
|
|
|
seq_imbuf_to_sequencer_space(context->scene, ibuf, false);
|
Color Management, Stage 2: Switch color pipeline to use OpenColorIO
Replace old color pipeline which was supporting linear/sRGB color spaces
only with OpenColorIO-based pipeline.
This introduces two configurable color spaces:
- Input color space for images and movie clips. This space is used to convert
images/movies from color space in which file is saved to Blender's linear
space (for float images, byte images are not internally converted, only input
space is stored for such images and used later).
This setting could be found in image/clip data block settings.
- Display color space which defines space in which particular display is working.
This settings could be found in scene's Color Management panel.
When render result is being displayed on the screen, apart from converting image
to display space, some additional conversions could happen.
This conversions are:
- View, which defines tone curve applying before display transformation.
These are different ways to view the image on the same display device.
For example it could be used to emulate film view on sRGB display.
- Exposure affects on image exposure before tone map is applied.
- Gamma is post-display gamma correction, could be used to match particular
display gamma.
- RGB curves are user-defined curves which are applying before display
transformation, could be used for different purposes.
All this settings by default are only applying on render result and does not
affect on other images. If some particular image needs to be affected by this
transformation, "View as Render" setting of image data block should be set to
truth. Movie clips are always affected by all display transformations.
This commit also introduces configurable color space in which sequencer is
working. This setting could be found in scene's Color Management panel and
it should be used if such stuff as grading needs to be done in color space
different from sRGB (i.e. when Film view on sRGB display is use, using VD16
space as sequencer's internal space would make grading working in space
which is close to the space using for display).
Some technical notes:
- Image buffer's float buffer is now always in linear space, even if it was
created from 16bit byte images.
- Space of byte buffer is stored in image buffer's rect_colorspace property.
- Profile of image buffer was removed since it's not longer meaningful.
- OpenGL and GLSL is supposed to always work in sRGB space. It is possible
to support other spaces, but it's quite large project which isn't so
much important.
- Legacy Color Management option disabled is emulated by using None display.
It could have some regressions, but there's no clear way to avoid them.
- If OpenColorIO is disabled on build time, it should make blender behaving
in the same way as previous release with color management enabled.
More details could be found at this page (more details would be added soon):
http://wiki.blender.org/index.php/Dev:Ref/Release_Notes/2.64/Color_Management
--
Thanks to Xavier Thomas, Lukas Toene for initial work on OpenColorIO
integration and to Brecht van Lommel for some further development and code/
usecase review!
2012-09-15 10:05:07 +00:00
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
void modifier_list_copy(Strip *strip_new, Strip *strip)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2025-05-01 00:22:04 +02:00
|
|
|
LISTBASE_FOREACH (StripModifierData *, smd, &strip->modifiers) {
|
|
|
|
|
StripModifierData *smdn;
|
2025-03-06 13:04:39 +01:00
|
|
|
const StripModifierTypeInfo *smti = modifier_type_info_get(smd->type);
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
smdn = static_cast<StripModifierData *>(MEM_dupallocN(smd));
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2019-04-22 09:39:35 +10:00
|
|
|
if (smti && smti->copy_data) {
|
2012-08-19 15:41:56 +00:00
|
|
|
smti->copy_data(smdn, smd);
|
2019-04-22 09:39:35 +10:00
|
|
|
}
|
2012-08-19 15:41:56 +00:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
BLI_addtail(&strip_new->modifiers, smdn);
|
|
|
|
|
BLI_uniquename(&strip_new->modifiers,
|
2023-08-28 18:56:25 +10:00
|
|
|
smdn,
|
|
|
|
|
"Strip Modifier",
|
|
|
|
|
'.',
|
2025-05-01 00:22:04 +02:00
|
|
|
offsetof(StripModifierData, name),
|
|
|
|
|
sizeof(StripModifierData::name));
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
int sequence_supports_modifiers(Strip *strip)
|
2012-08-19 15:41:56 +00:00
|
|
|
{
|
2025-01-07 16:10:36 +01:00
|
|
|
return (strip->type != STRIP_TYPE_SOUND_RAM);
|
2012-08-19 15:41:56 +00:00
|
|
|
}
|
2020-06-18 12:40:38 +10:00
|
|
|
|
|
|
|
|
/** \} */
|
2020-11-06 14:27:51 +01:00
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name .blend File I/O
|
|
|
|
|
* \{ */
|
|
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
void modifier_blend_write(BlendWriter *writer, ListBase *modbase)
|
2020-11-06 14:27:51 +01:00
|
|
|
{
|
2025-05-01 00:22:04 +02:00
|
|
|
LISTBASE_FOREACH (StripModifierData *, smd, modbase) {
|
2025-03-06 13:04:39 +01:00
|
|
|
const StripModifierTypeInfo *smti = modifier_type_info_get(smd->type);
|
2020-11-06 14:27:51 +01:00
|
|
|
|
|
|
|
|
if (smti) {
|
|
|
|
|
BLO_write_struct_by_name(writer, smti->struct_name, smd);
|
|
|
|
|
|
|
|
|
|
if (smd->type == seqModifierType_Curves) {
|
|
|
|
|
CurvesModifierData *cmd = (CurvesModifierData *)smd;
|
|
|
|
|
|
|
|
|
|
BKE_curvemapping_blend_write(writer, &cmd->curve_mapping);
|
|
|
|
|
}
|
|
|
|
|
else if (smd->type == seqModifierType_HueCorrect) {
|
|
|
|
|
HueCorrectModifierData *hcmd = (HueCorrectModifierData *)smd;
|
|
|
|
|
|
|
|
|
|
BKE_curvemapping_blend_write(writer, &hcmd->curve_mapping);
|
|
|
|
|
}
|
2023-08-30 22:36:36 +02:00
|
|
|
else if (smd->type == seqModifierType_SoundEqualizer) {
|
|
|
|
|
SoundEqualizerModifierData *semd = (SoundEqualizerModifierData *)smd;
|
|
|
|
|
LISTBASE_FOREACH (EQCurveMappingData *, eqcmd, &semd->graphics) {
|
|
|
|
|
BLO_write_struct_by_name(writer, "EQCurveMappingData", eqcmd);
|
|
|
|
|
BKE_curvemapping_blend_write(writer, &eqcmd->curve_mapping);
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-11-06 14:27:51 +01:00
|
|
|
}
|
|
|
|
|
else {
|
2025-05-01 00:22:04 +02:00
|
|
|
BLO_write_struct(writer, StripModifierData, smd);
|
2020-11-06 14:27:51 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-06 13:04:39 +01:00
|
|
|
void modifier_blend_read_data(BlendDataReader *reader, ListBase *lb)
|
2020-11-06 14:27:51 +01:00
|
|
|
{
|
2025-05-01 00:22:04 +02:00
|
|
|
BLO_read_struct_list(reader, StripModifierData, lb);
|
2020-11-06 14:27:51 +01:00
|
|
|
|
2025-05-01 00:22:04 +02:00
|
|
|
LISTBASE_FOREACH (StripModifierData *, smd, lb) {
|
|
|
|
|
if (smd->mask_strip) {
|
|
|
|
|
BLO_read_struct(reader, Strip, &smd->mask_strip);
|
2020-11-06 14:27:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (smd->type == seqModifierType_Curves) {
|
|
|
|
|
CurvesModifierData *cmd = (CurvesModifierData *)smd;
|
|
|
|
|
|
|
|
|
|
BKE_curvemapping_blend_read(reader, &cmd->curve_mapping);
|
|
|
|
|
}
|
|
|
|
|
else if (smd->type == seqModifierType_HueCorrect) {
|
|
|
|
|
HueCorrectModifierData *hcmd = (HueCorrectModifierData *)smd;
|
|
|
|
|
|
|
|
|
|
BKE_curvemapping_blend_read(reader, &hcmd->curve_mapping);
|
|
|
|
|
}
|
2023-08-30 22:36:36 +02:00
|
|
|
else if (smd->type == seqModifierType_SoundEqualizer) {
|
|
|
|
|
SoundEqualizerModifierData *semd = (SoundEqualizerModifierData *)smd;
|
2024-04-24 17:01:22 +02:00
|
|
|
BLO_read_struct_list(reader, EQCurveMappingData, &semd->graphics);
|
2023-08-30 22:36:36 +02:00
|
|
|
LISTBASE_FOREACH (EQCurveMappingData *, eqcmd, &semd->graphics) {
|
|
|
|
|
BKE_curvemapping_blend_read(reader, &eqcmd->curve_mapping);
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-11-06 14:27:51 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** \} */
|
2025-03-06 06:22:14 +01:00
|
|
|
|
|
|
|
|
} // namespace blender::seq
|