2023-05-31 16:19:06 +02:00
|
|
|
/* SPDX-FileCopyrightText: 2001-2002 NaN Holding BV. All rights reserved.
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
* SPDX-FileCopyrightText: 2024 Blender Authors
|
2023-05-31 16:19:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
2021-12-08 09:54:52 +01:00
|
|
|
|
|
|
|
|
/** \file
|
|
|
|
|
* \ingroup imbuf
|
|
|
|
|
*/
|
|
|
|
|
|
2021-12-11 09:48:43 +01:00
|
|
|
#include <type_traits>
|
|
|
|
|
|
2023-01-26 13:38:59 +01:00
|
|
|
#include "BLI_math_color_blend.h"
|
2024-01-15 16:38:41 +01:00
|
|
|
#include "BLI_math_interp.hh"
|
2023-02-06 21:25:45 +01:00
|
|
|
#include "BLI_math_matrix.hh"
|
Cleanup: reduce amount of math-related includes
Using ClangBuildAnalyzer on the whole Blender build, it was pointing
out that BLI_math.h is the heaviest "header hub" (i.e. non tiny file
that is included a lot).
However, there's very little (actually zero) source files in Blender
that need "all the math" (base, colors, vectors, matrices,
quaternions, intersection, interpolation, statistics, solvers and
time). A common use case is source files needing just vectors, or
just vectors & matrices, or just colors etc. Actually, 181 files
were including the whole math thing without needing it at all.
This change removes BLI_math.h completely, and instead in all the
places that need it, includes BLI_math_vector.h or BLI_math_color.h
and so on.
Change from that:
- BLI_math_color.h was included 1399 times -> now 408 (took 114.0sec
to parse -> now 36.3sec)
- BLI_simd.h 1403 -> 418 (109.7sec -> 34.9sec).
Full rebuild of Blender (Apple M1, Xcode, RelWithDebInfo) is not
affected much (342sec -> 334sec). Most of benefit would be when
someone's changing BLI_simd.h or BLI_math_color.h or similar files,
that now there's 3x fewer files result in a recompile.
Pull Request #110944
2023-08-09 11:39:20 +03:00
|
|
|
#include "BLI_math_vector.h"
|
2021-12-08 09:54:52 +01:00
|
|
|
#include "BLI_rect.h"
|
2023-01-27 09:56:19 +01:00
|
|
|
#include "BLI_task.hh"
|
2021-12-08 09:54:52 +01:00
|
|
|
|
2024-01-18 22:50:23 +02:00
|
|
|
#include "IMB_imbuf.hh"
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
#include "IMB_interp.hh"
|
|
|
|
|
|
|
|
|
|
using blender::float4;
|
|
|
|
|
using blender::uchar4;
|
2021-12-08 09:54:52 +01:00
|
|
|
|
|
|
|
|
namespace blender::imbuf::transform {
|
|
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
struct TransformContext {
|
2021-12-08 09:54:52 +01:00
|
|
|
const ImBuf *src;
|
|
|
|
|
ImBuf *dst;
|
2024-01-16 20:39:09 +01:00
|
|
|
eIMBTransformMode mode;
|
|
|
|
|
|
|
|
|
|
/* UV coordinates at the destination origin (0,0) in source image space. */
|
2024-01-16 13:32:47 +01:00
|
|
|
float2 start_uv;
|
2021-12-15 11:09:31 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
/* Source UV step delta, when moving along one destination pixel in X axis. */
|
2024-01-16 13:32:47 +01:00
|
|
|
float2 add_x;
|
2021-12-15 11:09:31 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
/* Source UV step delta, when moving along one destination pixel in Y axis. */
|
2024-01-16 13:32:47 +01:00
|
|
|
float2 add_y;
|
2021-12-15 11:09:31 +01:00
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
/* Source corners in destination pixel space, counter-clockwise. */
|
|
|
|
|
float2 src_corners[4];
|
|
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
IndexRange dst_region_x_range;
|
|
|
|
|
IndexRange dst_region_y_range;
|
2021-12-08 13:00:34 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
/* Cropping region in source image pixel space. */
|
|
|
|
|
rctf src_crop;
|
2021-12-08 09:54:52 +01:00
|
|
|
|
2024-01-29 18:41:31 +01:00
|
|
|
void init(const float4x4 &transform_matrix, const bool has_source_crop)
|
2021-12-08 09:54:52 +01:00
|
|
|
{
|
2024-01-16 20:39:09 +01:00
|
|
|
start_uv = transform_matrix.location().xy();
|
2024-01-16 13:32:47 +01:00
|
|
|
add_x = transform_matrix.x_axis().xy();
|
|
|
|
|
add_y = transform_matrix.y_axis().xy();
|
2024-01-16 20:39:09 +01:00
|
|
|
init_destination_region(transform_matrix, has_source_crop);
|
2021-12-08 09:54:52 +01:00
|
|
|
}
|
2023-01-26 13:38:59 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
private:
|
|
|
|
|
void init_destination_region(const float4x4 &transform_matrix, const bool has_source_crop)
|
2023-01-27 09:56:19 +01:00
|
|
|
{
|
2024-01-16 20:39:09 +01:00
|
|
|
if (!has_source_crop) {
|
|
|
|
|
dst_region_x_range = IndexRange(dst->x);
|
|
|
|
|
dst_region_y_range = IndexRange(dst->y);
|
2023-01-27 09:56:19 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-27 21:44:59 +11:00
|
|
|
/* Transform the src_crop to the destination buffer with a margin. */
|
2023-01-27 09:56:19 +01:00
|
|
|
const int2 margin(2);
|
|
|
|
|
rcti rect;
|
|
|
|
|
BLI_rcti_init_minmax(&rect);
|
2023-02-06 21:25:45 +01:00
|
|
|
float4x4 inverse = math::invert(transform_matrix);
|
2024-02-02 16:28:51 +01:00
|
|
|
const int2 src_coords[4] = {int2(src_crop.xmin, src_crop.ymin),
|
|
|
|
|
int2(src_crop.xmax, src_crop.ymin),
|
|
|
|
|
int2(src_crop.xmax, src_crop.ymax),
|
|
|
|
|
int2(src_crop.xmin, src_crop.ymax)};
|
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
|
int2 src_co = src_coords[i];
|
|
|
|
|
float3 dst_co = math::transform_point(inverse, float3(src_co.x, src_co.y, 0.0f));
|
|
|
|
|
src_corners[i] = float2(dst_co.x, dst_co.y);
|
|
|
|
|
|
2023-01-27 09:56:19 +01:00
|
|
|
BLI_rcti_do_minmax_v(&rect, int2(dst_co) + margin);
|
|
|
|
|
BLI_rcti_do_minmax_v(&rect, int2(dst_co) - margin);
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-27 21:44:59 +11:00
|
|
|
/* Clamp rect to fit inside the image buffer. */
|
2023-01-27 09:56:19 +01:00
|
|
|
rcti dest_rect;
|
|
|
|
|
BLI_rcti_init(&dest_rect, 0, dst->x, 0, dst->y);
|
|
|
|
|
BLI_rcti_isect(&rect, &dest_rect, &rect);
|
2024-01-16 20:39:09 +01:00
|
|
|
dst_region_x_range = IndexRange(rect.xmin, BLI_rcti_size_x(&rect));
|
|
|
|
|
dst_region_y_range = IndexRange(rect.ymin, BLI_rcti_size_y(&rect));
|
2021-12-15 11:09:31 +01:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
/* Crop uv-coordinates that are outside the user data src_crop rect. */
|
|
|
|
|
static bool should_discard(const TransformContext &ctx, const float2 &uv)
|
|
|
|
|
{
|
|
|
|
|
return uv.x < ctx.src_crop.xmin || uv.x >= ctx.src_crop.xmax || uv.y < ctx.src_crop.ymin ||
|
|
|
|
|
uv.y >= ctx.src_crop.ymax;
|
|
|
|
|
}
|
2021-12-15 11:09:31 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
template<typename T> static T *init_pixel_pointer(const ImBuf *image, int x, int y);
|
|
|
|
|
template<> uchar *init_pixel_pointer(const ImBuf *image, int x, int y)
|
|
|
|
|
{
|
|
|
|
|
return image->byte_buffer.data + (size_t(y) * image->x + x) * image->channels;
|
|
|
|
|
}
|
|
|
|
|
template<> float *init_pixel_pointer(const ImBuf *image, int x, int y)
|
|
|
|
|
{
|
|
|
|
|
return image->float_buffer.data + (size_t(y) * image->x + x) * image->channels;
|
|
|
|
|
}
|
2021-12-15 11:09:31 +01:00
|
|
|
|
2023-12-14 15:10:30 +01:00
|
|
|
static float wrap_uv(float value, int size)
|
|
|
|
|
{
|
|
|
|
|
int x = int(floorf(value));
|
|
|
|
|
if (UNLIKELY(x < 0 || x >= size)) {
|
|
|
|
|
x %= size;
|
2021-12-15 11:09:31 +01:00
|
|
|
if (x < 0) {
|
2023-12-14 15:10:30 +01:00
|
|
|
x += size;
|
2021-12-15 11:09:31 +01:00
|
|
|
}
|
|
|
|
|
}
|
2023-12-14 15:10:30 +01:00
|
|
|
return x;
|
|
|
|
|
}
|
2021-12-10 16:14:36 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
/* Read a pixel from an image buffer, with filtering/wrapping parameters. */
|
|
|
|
|
template<eIMBInterpolationFilterMode Filter, typename T, int NumChannels, bool WrapUV>
|
|
|
|
|
static void sample_image(const ImBuf *source, float u, float v, T *r_sample)
|
|
|
|
|
{
|
|
|
|
|
if constexpr (WrapUV) {
|
|
|
|
|
u = wrap_uv(u, source->x);
|
|
|
|
|
v = wrap_uv(v, source->y);
|
|
|
|
|
}
|
2024-01-26 11:57:19 +01:00
|
|
|
/* Bilinear/cubic interpolation functions use `floor(uv)` and `floor(uv)+1`
|
2024-01-16 20:39:09 +01:00
|
|
|
* texels. For proper mapping between pixel and texel spaces, need to
|
2024-01-26 11:57:19 +01:00
|
|
|
* subtract 0.5. */
|
|
|
|
|
if constexpr (Filter != IMB_FILTER_NEAREST) {
|
2024-01-16 20:39:09 +01:00
|
|
|
u -= 0.5f;
|
|
|
|
|
v -= 0.5f;
|
|
|
|
|
}
|
|
|
|
|
if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, float> && NumChannels == 4) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
interpolate_bilinear_fl(source, r_sample, u, v);
|
2024-01-16 20:39:09 +01:00
|
|
|
}
|
|
|
|
|
else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<T, uchar> && NumChannels == 4)
|
2023-01-26 13:38:59 +01:00
|
|
|
{
|
2024-03-21 13:22:10 +01:00
|
|
|
interpolate_nearest_border_byte(source, r_sample, u, v);
|
2023-01-26 13:38:59 +01:00
|
|
|
}
|
2024-01-16 20:39:09 +01:00
|
|
|
else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, uchar> && NumChannels == 4)
|
2023-01-26 13:38:59 +01:00
|
|
|
{
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
interpolate_bilinear_byte(source, r_sample, u, v);
|
2024-01-16 20:39:09 +01:00
|
|
|
}
|
|
|
|
|
else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, float>) {
|
|
|
|
|
if constexpr (WrapUV) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
math::interpolate_bilinear_wrap_fl(source->float_buffer.data,
|
2024-01-16 20:39:09 +01:00
|
|
|
r_sample,
|
|
|
|
|
source->x,
|
|
|
|
|
source->y,
|
|
|
|
|
NumChannels,
|
|
|
|
|
u,
|
|
|
|
|
v,
|
|
|
|
|
true,
|
|
|
|
|
true);
|
2023-01-26 15:17:37 +01:00
|
|
|
}
|
|
|
|
|
else {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
math::interpolate_bilinear_fl(
|
2024-01-16 20:39:09 +01:00
|
|
|
source->float_buffer.data, r_sample, source->x, source->y, NumChannels, u, v);
|
2023-01-26 15:17:37 +01:00
|
|
|
}
|
2023-01-26 13:38:59 +01:00
|
|
|
}
|
2024-01-16 20:39:09 +01:00
|
|
|
else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<T, float>) {
|
2024-03-21 13:22:10 +01:00
|
|
|
math::interpolate_nearest_border_fl(
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
source->float_buffer.data, r_sample, source->x, source->y, NumChannels, u, v);
|
2023-01-26 13:38:59 +01:00
|
|
|
}
|
2024-01-26 11:57:19 +01:00
|
|
|
else if constexpr (Filter == IMB_FILTER_CUBIC_BSPLINE && std::is_same_v<T, float>) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
math::interpolate_cubic_bspline_fl(
|
2024-01-16 20:39:09 +01:00
|
|
|
source->float_buffer.data, r_sample, source->x, source->y, NumChannels, u, v);
|
|
|
|
|
}
|
2024-01-26 11:57:19 +01:00
|
|
|
else if constexpr (Filter == IMB_FILTER_CUBIC_BSPLINE && std::is_same_v<T, uchar> &&
|
|
|
|
|
NumChannels == 4)
|
2023-01-26 13:38:59 +01:00
|
|
|
{
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
interpolate_cubic_bspline_byte(source, r_sample, u, v);
|
2021-12-08 09:54:52 +01:00
|
|
|
}
|
2024-01-26 11:57:19 +01:00
|
|
|
else if constexpr (Filter == IMB_FILTER_CUBIC_MITCHELL && std::is_same_v<T, float>) {
|
|
|
|
|
math::interpolate_cubic_mitchell_fl(
|
|
|
|
|
source->float_buffer.data, r_sample, source->x, source->y, NumChannels, u, v);
|
|
|
|
|
}
|
|
|
|
|
else if constexpr (Filter == IMB_FILTER_CUBIC_MITCHELL && std::is_same_v<T, uchar> &&
|
|
|
|
|
NumChannels == 4)
|
|
|
|
|
{
|
|
|
|
|
interpolate_cubic_mitchell_byte(source, r_sample, u, v);
|
|
|
|
|
}
|
2024-01-16 20:39:09 +01:00
|
|
|
else {
|
|
|
|
|
/* Unsupported sampler. */
|
|
|
|
|
BLI_assert_unreachable();
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-08 09:54:52 +01:00
|
|
|
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
static void add_subsample(const float src[4], float dst[4])
|
2021-12-08 09:54:52 +01:00
|
|
|
{
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
add_v4_v4(dst, src);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void add_subsample(const uchar src[4], float dst[4])
|
|
|
|
|
{
|
|
|
|
|
float premul[4];
|
|
|
|
|
straight_uchar_to_premul_float(premul, src);
|
|
|
|
|
add_v4_v4(dst, premul);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void store_premul_float_sample(const float sample[4], float dst[4])
|
|
|
|
|
{
|
|
|
|
|
copy_v4_v4(dst, sample);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void store_premul_float_sample(const float sample[4], uchar dst[4])
|
|
|
|
|
{
|
|
|
|
|
premul_float_to_straight_uchar(dst, sample);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<int SrcChannels> static void store_sample(const uchar *sample, uchar *dst)
|
|
|
|
|
{
|
|
|
|
|
BLI_STATIC_ASSERT(SrcChannels == 4, "Unsigned chars always have 4 channels.");
|
|
|
|
|
copy_v4_v4_uchar(dst, sample);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<int SrcChannels> static void store_sample(const float *sample, float *dst)
|
|
|
|
|
{
|
|
|
|
|
if constexpr (SrcChannels == 4) {
|
2024-01-16 20:39:09 +01:00
|
|
|
copy_v4_v4(dst, sample);
|
|
|
|
|
}
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
else if constexpr (SrcChannels == 3) {
|
2024-01-16 20:39:09 +01:00
|
|
|
copy_v4_fl4(dst, sample[0], sample[1], sample[2], 1.0f);
|
|
|
|
|
}
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
else if constexpr (SrcChannels == 2) {
|
2024-01-16 20:39:09 +01:00
|
|
|
copy_v4_fl4(dst, sample[0], sample[1], 0.0f, 1.0f);
|
|
|
|
|
}
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
else if constexpr (SrcChannels == 1) {
|
2024-05-03 11:32:43 +10:00
|
|
|
/* NOTE: single channel sample is stored as grayscale. */
|
2024-01-16 20:39:09 +01:00
|
|
|
copy_v4_fl4(dst, sample[0], sample[0], sample[0], 1.0f);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
BLI_assert_unreachable();
|
|
|
|
|
}
|
2021-12-08 09:54:52 +01:00
|
|
|
}
|
|
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
/* Process a block of destination image scanlines. */
|
|
|
|
|
template<eIMBInterpolationFilterMode Filter,
|
|
|
|
|
typename T,
|
|
|
|
|
int SrcChannels,
|
|
|
|
|
bool CropSource,
|
|
|
|
|
bool WrapUV>
|
|
|
|
|
static void process_scanlines(const TransformContext &ctx, IndexRange y_range)
|
2021-12-15 11:09:31 +01:00
|
|
|
{
|
2024-01-29 18:41:31 +01:00
|
|
|
if constexpr (Filter == IMB_FILTER_BOX) {
|
2024-01-16 20:39:09 +01:00
|
|
|
|
2024-01-18 10:37:49 +11:00
|
|
|
/* Multiple samples per pixel: accumulate them pre-multiplied,
|
|
|
|
|
* divide by sample count and write out (un-pre-multiplying if writing out
|
2024-01-29 18:41:31 +01:00
|
|
|
* to byte image).
|
|
|
|
|
*
|
|
|
|
|
* Do a box filter: for each destination pixel, accumulate XxY samples from source,
|
|
|
|
|
* based on scaling factors (length of X/Y pixel steps). Use at least 2 samples
|
2024-02-02 16:28:51 +01:00
|
|
|
* along each direction, so that in case of rotation the image gets
|
|
|
|
|
* some anti-aliasing. Use at most 100 samples along each direction,
|
2024-01-29 18:41:31 +01:00
|
|
|
* just as some way of clamping possible upper cost. Scaling something down by more
|
|
|
|
|
* than 100x should rarely if ever happen, worst case they will get some aliasing.
|
|
|
|
|
*/
|
|
|
|
|
float2 uv_start = ctx.start_uv;
|
|
|
|
|
int sub_count_x = int(math::clamp(roundf(math::length(ctx.add_x)), 2.0f, 100.0f));
|
|
|
|
|
int sub_count_y = int(math::clamp(roundf(math::length(ctx.add_y)), 2.0f, 100.0f));
|
|
|
|
|
const float inv_count = 1.0f / (sub_count_x * sub_count_y);
|
|
|
|
|
const float2 sub_step_x = ctx.add_x / sub_count_x;
|
|
|
|
|
const float2 sub_step_y = ctx.add_y / sub_count_y;
|
|
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
for (int yi : y_range) {
|
|
|
|
|
T *output = init_pixel_pointer<T>(ctx.dst, ctx.dst_region_x_range.first(), yi);
|
|
|
|
|
float2 uv_row = uv_start + yi * ctx.add_y;
|
|
|
|
|
for (int xi : ctx.dst_region_x_range) {
|
2024-01-29 18:41:31 +01:00
|
|
|
const float2 uv = uv_row + xi * ctx.add_x;
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
float sample[4] = {};
|
2024-01-16 20:39:09 +01:00
|
|
|
|
2024-01-29 18:41:31 +01:00
|
|
|
for (int sub_y = 0; sub_y < sub_count_y; sub_y++) {
|
|
|
|
|
for (int sub_x = 0; sub_x < sub_count_x; sub_x++) {
|
|
|
|
|
float2 delta = (sub_x + 0.5f) * sub_step_x + (sub_y + 0.5f) * sub_step_y;
|
|
|
|
|
float2 sub_uv = uv + delta;
|
|
|
|
|
if (!CropSource || !should_discard(ctx, sub_uv)) {
|
|
|
|
|
T sub_sample[4];
|
|
|
|
|
sample_image<eIMBInterpolationFilterMode::IMB_FILTER_NEAREST,
|
|
|
|
|
T,
|
|
|
|
|
SrcChannels,
|
|
|
|
|
WrapUV>(ctx.src, sub_uv.x, sub_uv.y, sub_sample);
|
|
|
|
|
add_subsample(sub_sample, sample);
|
|
|
|
|
}
|
2024-01-16 20:39:09 +01:00
|
|
|
}
|
|
|
|
|
}
|
2021-12-15 11:09:31 +01:00
|
|
|
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
mul_v4_v4fl(sample, sample, inv_count);
|
|
|
|
|
store_premul_float_sample(sample, output);
|
|
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
output += 4;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-15 11:09:31 +01:00
|
|
|
}
|
2024-01-16 20:39:09 +01:00
|
|
|
else {
|
2024-01-29 18:41:31 +01:00
|
|
|
/* One sample per pixel. Note: sample at pixel center for proper filtering. */
|
|
|
|
|
float2 uv_start = ctx.start_uv + ctx.add_x * 0.5f + ctx.add_y * 0.5f;
|
2024-01-16 20:39:09 +01:00
|
|
|
for (int yi : y_range) {
|
|
|
|
|
T *output = init_pixel_pointer<T>(ctx.dst, ctx.dst_region_x_range.first(), yi);
|
|
|
|
|
float2 uv_row = uv_start + yi * ctx.add_y;
|
|
|
|
|
for (int xi : ctx.dst_region_x_range) {
|
|
|
|
|
float2 uv = uv_row + xi * ctx.add_x;
|
|
|
|
|
if (!CropSource || !should_discard(ctx, uv)) {
|
|
|
|
|
T sample[4];
|
|
|
|
|
sample_image<Filter, T, SrcChannels, WrapUV>(ctx.src, uv.x, uv.y, sample);
|
VSE: Speedup Subsampled 3x3 image filter
Make Subsampling 3x3 filter twice faster (on 4K UHD resolution,
Windows/VS2022/Ryzen5950X: 52.7ms -> 28.3ms), by reformulating how it works:
Conceptually Subsampling filter is a box filter: it sums up N source image
pixels, computes their average and outputs the result. Critical thing is,
that should be done in premultiplied space so that colors from fully or
mostly transparent regions do not "override" opaque colors.
Previously, when operating on byte images, the code achieved this by always
working on byte values, doing "progressively smaller" lerp into byte color
result, taking care of premultiplication and again storing the "straight"
alpha for each sample being processed. This meant that for each sample, there
are 3 divisions involved! This also led to some precision loss, since for all
9 samples all the intermediate results would only be stored at byte precision.
Reformulate that by simply accumulating the premultiplied color as a float.
This gets rid of all divisions, except the last step when said float needs to
be written back into a byte color.
The unit test results have a tiny difference, since now it is arguably better
(as per above, previously it was having some precision loss).
Pull Request: https://projects.blender.org/blender/blender/pulls/117125
2024-01-17 10:26:50 +01:00
|
|
|
store_sample<SrcChannels>(sample, output);
|
2024-01-16 20:39:09 +01:00
|
|
|
}
|
|
|
|
|
output += 4;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-15 11:09:31 +01:00
|
|
|
}
|
2024-01-16 20:39:09 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<eIMBInterpolationFilterMode Filter, typename T, int SrcChannels>
|
|
|
|
|
static void transform_scanlines(const TransformContext &ctx, IndexRange y_range)
|
|
|
|
|
{
|
|
|
|
|
switch (ctx.mode) {
|
|
|
|
|
case IMB_TRANSFORM_MODE_REGULAR:
|
|
|
|
|
process_scanlines<Filter, T, SrcChannels, false, false>(ctx, y_range);
|
|
|
|
|
break;
|
|
|
|
|
case IMB_TRANSFORM_MODE_CROP_SRC:
|
|
|
|
|
process_scanlines<Filter, T, SrcChannels, true, false>(ctx, y_range);
|
|
|
|
|
break;
|
|
|
|
|
case IMB_TRANSFORM_MODE_WRAP_REPEAT:
|
|
|
|
|
process_scanlines<Filter, T, SrcChannels, false, true>(ctx, y_range);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
BLI_assert_unreachable();
|
|
|
|
|
break;
|
2021-12-15 11:09:31 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<eIMBInterpolationFilterMode Filter>
|
2024-01-16 20:39:09 +01:00
|
|
|
static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y_range)
|
2021-12-08 09:54:52 +01:00
|
|
|
{
|
2024-01-16 20:39:09 +01:00
|
|
|
int channels = ctx.src->channels;
|
2024-02-02 16:28:51 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
if (ctx.dst->float_buffer.data && ctx.src->float_buffer.data) {
|
2024-02-02 16:28:51 +01:00
|
|
|
/* Float pixels. */
|
2024-01-16 20:39:09 +01:00
|
|
|
if (channels == 4) {
|
|
|
|
|
transform_scanlines<Filter, float, 4>(ctx, y_range);
|
|
|
|
|
}
|
|
|
|
|
else if (channels == 3) {
|
|
|
|
|
transform_scanlines<Filter, float, 3>(ctx, y_range);
|
|
|
|
|
}
|
|
|
|
|
else if (channels == 2) {
|
|
|
|
|
transform_scanlines<Filter, float, 2>(ctx, y_range);
|
|
|
|
|
}
|
|
|
|
|
else if (channels == 1) {
|
|
|
|
|
transform_scanlines<Filter, float, 1>(ctx, y_range);
|
|
|
|
|
}
|
2021-12-08 13:00:34 +01:00
|
|
|
}
|
2024-02-02 16:28:51 +01:00
|
|
|
|
|
|
|
|
if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
|
|
|
|
|
/* Byte pixels. */
|
2024-01-16 20:39:09 +01:00
|
|
|
if (channels == 4) {
|
|
|
|
|
transform_scanlines<Filter, uchar, 4>(ctx, y_range);
|
|
|
|
|
}
|
2021-12-08 09:54:52 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
static float calc_coverage(float2 pos, int2 ipos, float2 delta, bool is_steep)
|
|
|
|
|
{
|
|
|
|
|
/* Very approximate: just take difference from coordinate (x or y based on
|
|
|
|
|
* steepness) to the integer coordinate. Adjust based on directions
|
|
|
|
|
* of the edges. */
|
|
|
|
|
float cov;
|
|
|
|
|
if (is_steep) {
|
|
|
|
|
cov = fabsf(ipos.x - pos.x);
|
|
|
|
|
if (delta.y < 0) {
|
|
|
|
|
cov = 1.0f - cov;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
cov = fabsf(ipos.y - pos.y);
|
|
|
|
|
if (delta.x > 0) {
|
|
|
|
|
cov = 1.0f - cov;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
cov = math::clamp(cov, 0.0f, 1.0f);
|
|
|
|
|
/* Resulting coverage is 0.5 .. 1.0 range, since we are only covering
|
|
|
|
|
* half of the pixels that should be AA'd (the other half is outside the
|
|
|
|
|
* quad and does not get rasterized). Square the coverage to get
|
|
|
|
|
* more range, and it looks a bit nicer that way. */
|
|
|
|
|
cov *= cov;
|
|
|
|
|
return cov;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void edge_aa(const TransformContext &ctx)
|
|
|
|
|
{
|
|
|
|
|
/* Rasterize along outer source edges into the destination image,
|
|
|
|
|
* reducing alpha based on pixel distance to the edge at each pixel.
|
|
|
|
|
* This is very approximate and not 100% correct "analytical AA",
|
|
|
|
|
* but simple to do and better than nothing. */
|
|
|
|
|
for (int line_idx = 0; line_idx < 4; line_idx++) {
|
|
|
|
|
float2 ptA = ctx.src_corners[line_idx];
|
|
|
|
|
float2 ptB = ctx.src_corners[(line_idx + 1) & 3];
|
|
|
|
|
float2 delta = ptB - ptA;
|
|
|
|
|
float2 abs_delta = math::abs(delta);
|
|
|
|
|
float length = math::max(abs_delta.x, abs_delta.y);
|
|
|
|
|
if (length < 1) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
bool is_steep = length == abs_delta.y;
|
|
|
|
|
|
|
|
|
|
/* It is very common to have non-rotated strips; check if edge line is
|
|
|
|
|
* horizontal or vertical and would not alter the coverage and can
|
|
|
|
|
* be skipped. */
|
|
|
|
|
constexpr float NO_ROTATION = 1.0e-6f;
|
|
|
|
|
constexpr float NO_AA_CONTRIB = 1.0e-2f;
|
|
|
|
|
if (is_steep) {
|
|
|
|
|
if ((abs_delta.x < NO_ROTATION) && (fabsf(ptA.x - roundf(ptA.x)) < NO_AA_CONTRIB)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if ((abs_delta.y < NO_ROTATION) && (fabsf(ptA.y - roundf(ptA.y)) < NO_AA_CONTRIB)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* DDA line raster: step one pixel along the longer direction. */
|
|
|
|
|
delta /= length;
|
|
|
|
|
if (ctx.dst->float_buffer.data != nullptr) {
|
|
|
|
|
/* Float pixels. */
|
|
|
|
|
float *dst = ctx.dst->float_buffer.data;
|
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
|
float2 pos = ptA + i * delta;
|
|
|
|
|
int2 ipos = int2(pos);
|
|
|
|
|
if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) {
|
|
|
|
|
float cov = calc_coverage(pos, ipos, delta, is_steep);
|
|
|
|
|
size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4;
|
|
|
|
|
dst[idx + 0] *= cov;
|
|
|
|
|
dst[idx + 1] *= cov;
|
|
|
|
|
dst[idx + 2] *= cov;
|
|
|
|
|
dst[idx + 3] *= cov;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (ctx.dst->byte_buffer.data != nullptr) {
|
|
|
|
|
/* Byte pixels. */
|
|
|
|
|
uchar *dst = ctx.dst->byte_buffer.data;
|
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
|
float2 pos = ptA + i * delta;
|
|
|
|
|
int2 ipos = int2(pos);
|
|
|
|
|
if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) {
|
|
|
|
|
float cov = calc_coverage(pos, ipos, delta, is_steep);
|
|
|
|
|
size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4;
|
|
|
|
|
dst[idx + 3] *= cov;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-08 09:54:52 +01:00
|
|
|
} // namespace blender::imbuf::transform
|
|
|
|
|
|
|
|
|
|
using namespace blender::imbuf::transform;
|
2024-01-16 20:39:09 +01:00
|
|
|
using namespace blender;
|
2021-12-08 09:54:52 +01:00
|
|
|
|
2023-06-03 08:36:28 +10:00
|
|
|
void IMB_transform(const ImBuf *src,
|
|
|
|
|
ImBuf *dst,
|
2021-12-08 09:54:52 +01:00
|
|
|
const eIMBTransformMode mode,
|
|
|
|
|
const eIMBInterpolationFilterMode filter,
|
|
|
|
|
const float transform_matrix[4][4],
|
2023-06-03 08:36:28 +10:00
|
|
|
const rctf *src_crop)
|
2021-12-08 09:54:52 +01:00
|
|
|
{
|
|
|
|
|
BLI_assert_msg(mode != IMB_TRANSFORM_MODE_CROP_SRC || src_crop != nullptr,
|
|
|
|
|
"No source crop rect given, but crop source is requested. Or source crop rect "
|
|
|
|
|
"was given, but crop source was not requested.");
|
2024-01-16 20:39:09 +01:00
|
|
|
BLI_assert_msg(dst->channels == 4, "Destination image must have 4 channels.");
|
2021-12-08 13:00:34 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
TransformContext ctx;
|
|
|
|
|
ctx.src = src;
|
|
|
|
|
ctx.dst = dst;
|
|
|
|
|
ctx.mode = mode;
|
|
|
|
|
bool crop = mode == IMB_TRANSFORM_MODE_CROP_SRC;
|
|
|
|
|
if (crop) {
|
|
|
|
|
ctx.src_crop = *src_crop;
|
2021-12-08 09:54:52 +01:00
|
|
|
}
|
2024-01-29 18:41:31 +01:00
|
|
|
ctx.init(blender::float4x4(transform_matrix), crop);
|
2021-12-08 13:00:34 +01:00
|
|
|
|
2024-01-16 20:39:09 +01:00
|
|
|
threading::parallel_for(ctx.dst_region_y_range, 8, [&](IndexRange y_range) {
|
|
|
|
|
if (filter == IMB_FILTER_NEAREST) {
|
|
|
|
|
transform_scanlines_filter<IMB_FILTER_NEAREST>(ctx, y_range);
|
|
|
|
|
}
|
|
|
|
|
else if (filter == IMB_FILTER_BILINEAR) {
|
|
|
|
|
transform_scanlines_filter<IMB_FILTER_BILINEAR>(ctx, y_range);
|
|
|
|
|
}
|
2024-01-26 11:57:19 +01:00
|
|
|
else if (filter == IMB_FILTER_CUBIC_BSPLINE) {
|
|
|
|
|
transform_scanlines_filter<IMB_FILTER_CUBIC_BSPLINE>(ctx, y_range);
|
|
|
|
|
}
|
|
|
|
|
else if (filter == IMB_FILTER_CUBIC_MITCHELL) {
|
|
|
|
|
transform_scanlines_filter<IMB_FILTER_CUBIC_MITCHELL>(ctx, y_range);
|
2024-01-16 20:39:09 +01:00
|
|
|
}
|
2024-01-29 18:41:31 +01:00
|
|
|
else if (filter == IMB_FILTER_BOX) {
|
|
|
|
|
transform_scanlines_filter<IMB_FILTER_BOX>(ctx, y_range);
|
|
|
|
|
}
|
2024-01-16 20:39:09 +01:00
|
|
|
});
|
2024-02-02 16:28:51 +01:00
|
|
|
|
|
|
|
|
if (crop && (filter != IMB_FILTER_NEAREST)) {
|
|
|
|
|
edge_aa(ctx);
|
|
|
|
|
}
|
2021-12-08 09:54:52 +01:00
|
|
|
}
|