2024-01-15 16:38:41 +01:00
|
|
|
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
2023-05-31 16:19:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
2012-11-10 19:11:25 +00:00
|
|
|
|
2019-02-18 08:08:12 +11:00
|
|
|
/** \file
|
|
|
|
|
* \ingroup bli
|
2013-09-12 03:02:50 +00:00
|
|
|
*/
|
|
|
|
|
|
2024-01-19 12:08:40 -05:00
|
|
|
#include <cmath>
|
|
|
|
|
#include <cstring>
|
2012-11-10 19:11:25 +00:00
|
|
|
|
Cleanup: reduce amount of math-related includes
Using ClangBuildAnalyzer on the whole Blender build, it was pointing
out that BLI_math.h is the heaviest "header hub" (i.e. non tiny file
that is included a lot).
However, there's very little (actually zero) source files in Blender
that need "all the math" (base, colors, vectors, matrices,
quaternions, intersection, interpolation, statistics, solvers and
time). A common use case is source files needing just vectors, or
just vectors & matrices, or just colors etc. Actually, 181 files
were including the whole math thing without needing it at all.
This change removes BLI_math.h completely, and instead in all the
places that need it, includes BLI_math_vector.h or BLI_math_color.h
and so on.
Change from that:
- BLI_math_color.h was included 1399 times -> now 408 (took 114.0sec
to parse -> now 36.3sec)
- BLI_simd.h 1403 -> 418 (109.7sec -> 34.9sec).
Full rebuild of Blender (Apple M1, Xcode, RelWithDebInfo) is not
affected much (342sec -> 334sec). Most of benefit would be when
someone's changing BLI_simd.h or BLI_math_color.h or similar files,
that now there's 3x fewer files result in a recompile.
Pull Request #110944
2023-08-09 11:39:20 +03:00
|
|
|
#include "BLI_math_base.h"
|
2024-01-26 11:57:19 +01:00
|
|
|
#include "BLI_math_base.hh"
|
2024-01-15 16:38:41 +01:00
|
|
|
#include "BLI_math_interp.hh"
|
Cleanup: reduce amount of math-related includes
Using ClangBuildAnalyzer on the whole Blender build, it was pointing
out that BLI_math.h is the heaviest "header hub" (i.e. non tiny file
that is included a lot).
However, there's very little (actually zero) source files in Blender
that need "all the math" (base, colors, vectors, matrices,
quaternions, intersection, interpolation, statistics, solvers and
time). A common use case is source files needing just vectors, or
just vectors & matrices, or just colors etc. Actually, 181 files
were including the whole math thing without needing it at all.
This change removes BLI_math.h completely, and instead in all the
places that need it, includes BLI_math_vector.h or BLI_math_color.h
and so on.
Change from that:
- BLI_math_color.h was included 1399 times -> now 408 (took 114.0sec
to parse -> now 36.3sec)
- BLI_simd.h 1403 -> 418 (109.7sec -> 34.9sec).
Full rebuild of Blender (Apple M1, Xcode, RelWithDebInfo) is not
affected much (342sec -> 334sec). Most of benefit would be when
someone's changing BLI_simd.h or BLI_math_color.h or similar files,
that now there's 3x fewer files result in a recompile.
Pull Request #110944
2023-08-09 11:39:20 +03:00
|
|
|
#include "BLI_math_vector.h"
|
2024-01-15 16:38:41 +01:00
|
|
|
#include "BLI_math_vector_types.hh"
|
2024-05-02 16:22:19 +02:00
|
|
|
#include "BLI_simd.hh"
|
2024-02-14 13:40:31 +11:00
|
|
|
|
|
|
|
|
#include "BLI_strict_flags.h" /* Keep last. */
|
2013-12-06 03:46:27 +11:00
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
namespace blender::math {
|
|
|
|
|
|
2024-01-26 11:57:19 +01:00
|
|
|
enum class eCubicFilter {
|
|
|
|
|
BSpline,
|
|
|
|
|
Mitchell,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Calculate cubic filter coefficients, for samples at -1,0,+1,+2.
|
|
|
|
|
* f is 0..1 offset from texel center in pixel space. */
|
2024-02-02 16:28:51 +01:00
|
|
|
template<enum eCubicFilter filter> static float4 cubic_filter_coefficients(float f)
|
2012-11-10 19:11:25 +00:00
|
|
|
{
|
2024-01-15 16:38:41 +01:00
|
|
|
float f2 = f * f;
|
|
|
|
|
float f3 = f2 * f;
|
|
|
|
|
|
2024-01-26 11:57:19 +01:00
|
|
|
if constexpr (filter == eCubicFilter::BSpline) {
|
|
|
|
|
/* Cubic B-Spline (Mitchell-Netravali filter with B=1, C=0 parameters). */
|
|
|
|
|
float w3 = f3 * (1.0f / 6.0f);
|
|
|
|
|
float w0 = -w3 + f2 * 0.5f - f * 0.5f + 1.0f / 6.0f;
|
|
|
|
|
float w1 = f3 * 0.5f - f2 * 1.0f + 2.0f / 3.0f;
|
|
|
|
|
float w2 = 1.0f - w0 - w1 - w3;
|
2024-02-02 16:28:51 +01:00
|
|
|
return float4(w0, w1, w2, w3);
|
2024-01-26 11:57:19 +01:00
|
|
|
}
|
|
|
|
|
else if constexpr (filter == eCubicFilter::Mitchell) {
|
|
|
|
|
/* Cubic Mitchell-Netravali filter with B=1/3, C=1/3 parameters. */
|
|
|
|
|
float w0 = -7.0f / 18.0f * f3 + 5.0f / 6.0f * f2 - 0.5f * f + 1.0f / 18.0f;
|
|
|
|
|
float w1 = 7.0f / 6.0f * f3 - 2.0f * f2 + 8.0f / 9.0f;
|
|
|
|
|
float w2 = -7.0f / 6.0f * f3 + 3.0f / 2.0f * f2 + 0.5f * f + 1.0f / 18.0f;
|
|
|
|
|
float w3 = 7.0f / 18.0f * f3 - 1.0f / 3.0f * f2;
|
2024-02-02 16:28:51 +01:00
|
|
|
return float4(w0, w1, w2, w3);
|
2024-01-26 11:57:19 +01:00
|
|
|
}
|
2012-11-10 19:11:25 +00:00
|
|
|
}
|
|
|
|
|
|
2024-04-15 15:21:58 +02:00
|
|
|
#if BLI_HAVE_SSE4
|
2024-01-26 11:57:19 +01:00
|
|
|
template<eCubicFilter filter>
|
2024-01-15 16:38:41 +01:00
|
|
|
BLI_INLINE void bicubic_interpolation_uchar_simd(
|
|
|
|
|
const uchar *src_buffer, uchar *output, int width, int height, float u, float v)
|
2012-11-11 08:48:35 +00:00
|
|
|
{
|
2024-01-15 16:38:41 +01:00
|
|
|
__m128 uv = _mm_set_ps(0, 0, v, u);
|
2024-04-15 15:21:58 +02:00
|
|
|
__m128 uv_floor = _mm_floor_ps(uv);
|
2024-01-15 16:38:41 +01:00
|
|
|
__m128i i_uv = _mm_cvttps_epi32(uv_floor);
|
|
|
|
|
|
|
|
|
|
/* Sample area entirely outside image?
|
|
|
|
|
* We check if any of (iu+1, iv+1, width, height) < (0, 0, iu+1, iv+1). */
|
|
|
|
|
__m128i i_uv_1 = _mm_add_epi32(i_uv, _mm_set_epi32(0, 0, 1, 1));
|
|
|
|
|
__m128i cmp_a = _mm_or_si128(i_uv_1, _mm_set_epi32(height, width, 0, 0));
|
|
|
|
|
__m128i cmp_b = _mm_shuffle_epi32(i_uv_1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
|
__m128i invalid = _mm_cmplt_epi32(cmp_a, cmp_b);
|
|
|
|
|
if (_mm_movemask_ps(_mm_castsi128_ps(invalid)) != 0) {
|
|
|
|
|
memset(output, 0, 4);
|
|
|
|
|
return;
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
2024-01-15 16:38:41 +01:00
|
|
|
|
|
|
|
|
__m128 frac_uv = _mm_sub_ps(uv, uv_floor);
|
|
|
|
|
|
|
|
|
|
/* Calculate pixel weights. */
|
2024-02-02 16:28:51 +01:00
|
|
|
float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
|
|
|
|
|
float4 wy = cubic_filter_coefficients<filter>(
|
2024-01-15 16:38:41 +01:00
|
|
|
_mm_cvtss_f32(_mm_shuffle_ps(frac_uv, frac_uv, 1)));
|
|
|
|
|
|
|
|
|
|
/* Read 4x4 source pixels and blend them. */
|
|
|
|
|
__m128 out = _mm_setzero_ps();
|
|
|
|
|
int iu = _mm_cvtsi128_si32(i_uv);
|
|
|
|
|
int iv = _mm_cvtsi128_si32(_mm_shuffle_epi32(i_uv, 1));
|
|
|
|
|
|
|
|
|
|
for (int n = 0; n < 4; n++) {
|
|
|
|
|
int y1 = iv + n - 1;
|
|
|
|
|
CLAMP(y1, 0, height - 1);
|
|
|
|
|
for (int m = 0; m < 4; m++) {
|
|
|
|
|
|
|
|
|
|
int x1 = iu + m - 1;
|
|
|
|
|
CLAMP(x1, 0, width - 1);
|
|
|
|
|
float w = wx[m] * wy[n];
|
|
|
|
|
|
|
|
|
|
const uchar *data = src_buffer + (width * y1 + x1) * 4;
|
|
|
|
|
/* Load 4 bytes and expand into 4-lane SIMD. */
|
|
|
|
|
__m128i sample_i = _mm_castps_si128(_mm_load_ss((const float *)data));
|
|
|
|
|
sample_i = _mm_unpacklo_epi8(sample_i, _mm_setzero_si128());
|
|
|
|
|
sample_i = _mm_unpacklo_epi16(sample_i, _mm_setzero_si128());
|
|
|
|
|
|
|
|
|
|
/* Accumulate into out with weight. */
|
|
|
|
|
out = _mm_add_ps(out, _mm_mul_ps(_mm_cvtepi32_ps(sample_i), _mm_set1_ps(w)));
|
|
|
|
|
}
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
2024-01-15 16:38:41 +01:00
|
|
|
|
|
|
|
|
/* Pack and write to destination: pack to 16 bit signed, then to 8 bit
|
2024-01-26 11:57:19 +01:00
|
|
|
* unsigned, then write resulting 32-bit value. This will clamp
|
|
|
|
|
* out of range values too. */
|
2024-01-15 16:38:41 +01:00
|
|
|
out = _mm_add_ps(out, _mm_set1_ps(0.5f));
|
|
|
|
|
__m128i rgba32 = _mm_cvttps_epi32(out);
|
|
|
|
|
__m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128());
|
|
|
|
|
__m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128());
|
|
|
|
|
_mm_store_ss((float *)output, _mm_castsi128_ps(rgba8));
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
2024-04-15 15:21:58 +02:00
|
|
|
#endif /* BLI_HAVE_SSE4 */
|
2012-11-11 08:48:35 +00:00
|
|
|
|
2024-01-26 11:57:19 +01:00
|
|
|
template<typename T, eCubicFilter filter>
|
2024-01-15 16:38:41 +01:00
|
|
|
static void bicubic_interpolation(
|
|
|
|
|
const T *src_buffer, T *output, int width, int height, int components, float u, float v)
|
2012-11-10 19:11:25 +00:00
|
|
|
{
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
BLI_assert(src_buffer && output);
|
|
|
|
|
|
2024-04-15 15:21:58 +02:00
|
|
|
#if BLI_HAVE_SSE4
|
2024-01-15 16:38:41 +01:00
|
|
|
if constexpr (std::is_same_v<T, uchar>) {
|
|
|
|
|
if (components == 4) {
|
2024-01-26 11:57:19 +01:00
|
|
|
bicubic_interpolation_uchar_simd<filter>(src_buffer, output, width, height, u, v);
|
2024-01-15 16:38:41 +01:00
|
|
|
return;
|
2015-02-24 00:36:33 +05:00
|
|
|
}
|
2012-11-10 19:11:25 +00:00
|
|
|
}
|
2024-01-15 16:38:41 +01:00
|
|
|
#endif
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-02-02 10:43:18 +11:00
|
|
|
int iu = int(floor(u));
|
|
|
|
|
int iv = int(floor(v));
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-15 16:38:41 +01:00
|
|
|
/* Sample area entirely outside image? */
|
|
|
|
|
if (iu + 1 < 0 || iu > width - 1 || iv + 1 < 0 || iv > height - 1) {
|
|
|
|
|
memset(output, 0, size_t(components) * sizeof(T));
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-02-02 10:43:18 +11:00
|
|
|
float frac_u = u - float(iu);
|
|
|
|
|
float frac_v = v - float(iv);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-15 16:38:41 +01:00
|
|
|
float4 out{0.0f};
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-15 16:38:41 +01:00
|
|
|
/* Calculate pixel weights. */
|
2024-01-26 11:57:19 +01:00
|
|
|
float4 wx = cubic_filter_coefficients<filter>(frac_u);
|
|
|
|
|
float4 wy = cubic_filter_coefficients<filter>(frac_v);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-15 16:38:41 +01:00
|
|
|
/* Read 4x4 source pixels and blend them. */
|
|
|
|
|
for (int n = 0; n < 4; n++) {
|
|
|
|
|
int y1 = iv + n - 1;
|
|
|
|
|
CLAMP(y1, 0, height - 1);
|
|
|
|
|
for (int m = 0; m < 4; m++) {
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-15 16:38:41 +01:00
|
|
|
int x1 = iu + m - 1;
|
|
|
|
|
CLAMP(x1, 0, width - 1);
|
|
|
|
|
float w = wx[m] * wy[n];
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-15 16:38:41 +01:00
|
|
|
const T *data = src_buffer + (width * y1 + x1) * components;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-11-11 08:48:35 +00:00
|
|
|
if (components == 1) {
|
2012-11-10 19:11:25 +00:00
|
|
|
out[0] += data[0] * w;
|
|
|
|
|
}
|
|
|
|
|
else if (components == 3) {
|
|
|
|
|
out[0] += data[0] * w;
|
|
|
|
|
out[1] += data[1] * w;
|
|
|
|
|
out[2] += data[2] * w;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
out[0] += data[0] * w;
|
|
|
|
|
out[1] += data[1] * w;
|
|
|
|
|
out[2] += data[2] * w;
|
|
|
|
|
out[3] += data[3] * w;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-26 11:57:19 +01:00
|
|
|
/* Mitchell filter has negative lobes; prevent output from going out of range. */
|
|
|
|
|
if constexpr (filter == eCubicFilter::Mitchell) {
|
|
|
|
|
for (int i = 0; i < components; i++) {
|
|
|
|
|
out[i] = math::max(out[i], 0.0f);
|
|
|
|
|
if constexpr (std::is_same_v<T, uchar>) {
|
|
|
|
|
out[i] = math::min(out[i], 255.0f);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-15 16:38:41 +01:00
|
|
|
/* Write result. */
|
|
|
|
|
if constexpr (std::is_same_v<T, float>) {
|
2012-11-11 08:48:35 +00:00
|
|
|
if (components == 1) {
|
2024-01-15 16:38:41 +01:00
|
|
|
output[0] = out[0];
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
|
|
|
|
else if (components == 3) {
|
2024-01-15 16:38:41 +01:00
|
|
|
copy_v3_v3(output, out);
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
|
|
|
|
else {
|
2024-01-15 16:38:41 +01:00
|
|
|
copy_v4_v4(output, out);
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
2012-11-10 19:11:25 +00:00
|
|
|
}
|
|
|
|
|
else {
|
2012-11-11 08:48:35 +00:00
|
|
|
if (components == 1) {
|
2024-01-19 11:52:10 +11:00
|
|
|
output[0] = uchar(out[0] + 0.5f);
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
|
|
|
|
else if (components == 3) {
|
2024-01-19 11:52:10 +11:00
|
|
|
output[0] = uchar(out[0] + 0.5f);
|
|
|
|
|
output[1] = uchar(out[1] + 0.5f);
|
|
|
|
|
output[2] = uchar(out[2] + 0.5f);
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
|
|
|
|
else {
|
2024-01-19 11:52:10 +11:00
|
|
|
output[0] = uchar(out[0] + 0.5f);
|
|
|
|
|
output[1] = uchar(out[1] + 0.5f);
|
|
|
|
|
output[2] = uchar(out[2] + 0.5f);
|
|
|
|
|
output[3] = uchar(out[3] + 0.5f);
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
2012-11-10 19:11:25 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
template<bool border>
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
BLI_INLINE void bilinear_fl_impl(const float *buffer,
|
|
|
|
|
float *output,
|
|
|
|
|
int width,
|
|
|
|
|
int height,
|
|
|
|
|
int components,
|
|
|
|
|
float u,
|
|
|
|
|
float v,
|
|
|
|
|
bool wrap_x = false,
|
|
|
|
|
bool wrap_y = false)
|
2012-11-10 19:11:25 +00:00
|
|
|
{
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
BLI_assert(buffer && output);
|
2012-11-10 19:11:25 +00:00
|
|
|
float a, b;
|
|
|
|
|
float a_b, ma_b, a_mb, ma_mb;
|
|
|
|
|
int y1, y2, x1, x2;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
if (wrap_x) {
|
|
|
|
|
u = floored_fmod(u, float(width));
|
|
|
|
|
}
|
|
|
|
|
if (wrap_y) {
|
|
|
|
|
v = floored_fmod(v, float(height));
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-14 15:10:30 +01:00
|
|
|
float uf = floorf(u);
|
|
|
|
|
float vf = floorf(v);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-02-02 10:43:18 +11:00
|
|
|
x1 = int(uf);
|
2023-12-14 15:10:30 +01:00
|
|
|
x2 = x1 + 1;
|
2024-01-19 11:52:10 +11:00
|
|
|
y1 = int(vf);
|
2023-12-14 15:10:30 +01:00
|
|
|
y2 = y1 + 1;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2023-12-14 15:10:30 +01:00
|
|
|
const float *row1, *row2, *row3, *row4;
|
|
|
|
|
const float empty[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-07-14 18:55:43 +10:00
|
|
|
/* Check if +1 samples need wrapping, or we don't do wrapping then if
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
* we are sampling completely outside the image. */
|
2023-12-14 15:10:30 +01:00
|
|
|
if (wrap_x) {
|
|
|
|
|
if (x2 >= width) {
|
|
|
|
|
x2 = 0;
|
2016-07-08 02:14:54 +10:00
|
|
|
}
|
2023-12-14 15:10:30 +01:00
|
|
|
}
|
2024-03-07 15:34:42 +01:00
|
|
|
else if (border && (x2 < 0 || x1 >= width)) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
copy_vn_fl(output, components, 0.0f);
|
2023-12-14 15:10:30 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (wrap_y) {
|
|
|
|
|
if (y2 >= height) {
|
|
|
|
|
y2 = 0;
|
2016-07-08 02:14:54 +10:00
|
|
|
}
|
2023-12-14 15:10:30 +01:00
|
|
|
}
|
2024-03-07 15:34:42 +01:00
|
|
|
else if (border && (y2 < 0 || y1 >= height)) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
copy_vn_fl(output, components, 0.0f);
|
2023-12-14 15:10:30 +01:00
|
|
|
return;
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
/* Sample locations. */
|
|
|
|
|
if constexpr (border) {
|
|
|
|
|
row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * components;
|
|
|
|
|
row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * components;
|
|
|
|
|
row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * components;
|
|
|
|
|
row4 = (x2 > width - 1 || y2 > height - 1) ? empty :
|
|
|
|
|
buffer + (int64_t(width) * y2 + x2) * components;
|
2023-12-14 15:10:30 +01:00
|
|
|
}
|
|
|
|
|
else {
|
2024-02-02 16:28:51 +01:00
|
|
|
x1 = blender::math::clamp(x1, 0, width - 1);
|
|
|
|
|
x2 = blender::math::clamp(x2, 0, width - 1);
|
|
|
|
|
y1 = blender::math::clamp(y1, 0, height - 1);
|
|
|
|
|
y2 = blender::math::clamp(y2, 0, height - 1);
|
|
|
|
|
row1 = buffer + (int64_t(width) * y1 + x1) * components;
|
|
|
|
|
row2 = buffer + (int64_t(width) * y2 + x1) * components;
|
|
|
|
|
row3 = buffer + (int64_t(width) * y1 + x2) * components;
|
|
|
|
|
row4 = buffer + (int64_t(width) * y2 + x2) * components;
|
2023-12-14 15:10:30 +01:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2023-12-14 15:10:30 +01:00
|
|
|
a = u - uf;
|
|
|
|
|
b = v - vf;
|
|
|
|
|
a_b = a * b;
|
|
|
|
|
ma_b = (1.0f - a) * b;
|
|
|
|
|
a_mb = a * (1.0f - b);
|
|
|
|
|
ma_mb = (1.0f - a) * (1.0f - b);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2023-12-14 15:10:30 +01:00
|
|
|
if (components == 1) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
|
2023-12-14 15:10:30 +01:00
|
|
|
}
|
|
|
|
|
else if (components == 3) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
|
|
|
|
|
output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
|
|
|
|
|
output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
2012-11-10 19:11:25 +00:00
|
|
|
else {
|
2023-12-14 15:10:30 +01:00
|
|
|
#if BLI_HAVE_SSE2
|
|
|
|
|
__m128 rgba1 = _mm_loadu_ps(row1);
|
|
|
|
|
__m128 rgba2 = _mm_loadu_ps(row2);
|
|
|
|
|
__m128 rgba3 = _mm_loadu_ps(row3);
|
|
|
|
|
__m128 rgba4 = _mm_loadu_ps(row4);
|
|
|
|
|
rgba1 = _mm_mul_ps(_mm_set1_ps(ma_mb), rgba1);
|
|
|
|
|
rgba2 = _mm_mul_ps(_mm_set1_ps(ma_b), rgba2);
|
|
|
|
|
rgba3 = _mm_mul_ps(_mm_set1_ps(a_mb), rgba3);
|
|
|
|
|
rgba4 = _mm_mul_ps(_mm_set1_ps(a_b), rgba4);
|
|
|
|
|
__m128 rgba13 = _mm_add_ps(rgba1, rgba3);
|
|
|
|
|
__m128 rgba24 = _mm_add_ps(rgba2, rgba4);
|
|
|
|
|
__m128 rgba = _mm_add_ps(rgba13, rgba24);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
_mm_storeu_ps(output, rgba);
|
2023-12-14 15:10:30 +01:00
|
|
|
#else
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
|
|
|
|
|
output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
|
|
|
|
|
output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
|
|
|
|
|
output[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3];
|
2023-12-14 15:10:30 +01:00
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
template<bool border>
|
|
|
|
|
BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
|
2023-12-14 15:10:30 +01:00
|
|
|
{
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
BLI_assert(buffer);
|
|
|
|
|
uchar4 res;
|
|
|
|
|
|
2024-04-15 15:21:58 +02:00
|
|
|
#if BLI_HAVE_SSE4
|
2023-12-14 15:10:30 +01:00
|
|
|
__m128 uvuv = _mm_set_ps(v, u, v, u);
|
2024-04-15 15:21:58 +02:00
|
|
|
__m128 uvuv_floor = _mm_floor_ps(uvuv);
|
2023-12-14 15:10:30 +01:00
|
|
|
|
|
|
|
|
/* x1, y1, x2, y2 */
|
|
|
|
|
__m128i xy12 = _mm_add_epi32(_mm_cvttps_epi32(uvuv_floor), _mm_set_epi32(1, 1, 0, 0));
|
|
|
|
|
/* Check whether any of the coordinates are outside of the image. */
|
|
|
|
|
__m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width),
|
|
|
|
|
_mm_set1_epi32(1));
|
2024-02-02 16:28:51 +01:00
|
|
|
|
|
|
|
|
/* Samples 1,2,3,4 will be in this order: x1y1, x1y2, x2y1, x2y2. */
|
|
|
|
|
__m128i x1234, y1234, invalid_1234;
|
|
|
|
|
|
|
|
|
|
if constexpr (border) {
|
|
|
|
|
/* Blend black colors for samples right outside the image: figure out
|
|
|
|
|
* which of the 4 samples were outside, set their coordinates to zero
|
|
|
|
|
* and later on put black color into their place. */
|
|
|
|
|
__m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
|
|
|
|
|
__m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
|
|
|
|
|
__m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
|
|
|
|
|
|
|
|
|
|
/* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
|
|
|
|
|
x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
|
|
|
|
|
y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
|
|
|
|
|
invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
|
|
|
|
|
_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
|
|
|
|
|
/* Set x & y to zero for invalid samples. */
|
|
|
|
|
x1234 = _mm_andnot_si128(invalid_1234, x1234);
|
|
|
|
|
y1234 = _mm_andnot_si128(invalid_1234, y1234);
|
|
|
|
|
}
|
|
|
|
|
else {
|
2024-03-08 07:50:01 +01:00
|
|
|
/* Clamp samples to image edges. */
|
2024-04-15 15:21:58 +02:00
|
|
|
__m128i xy12_clamped = _mm_max_epi32(xy12, _mm_setzero_si128());
|
|
|
|
|
xy12_clamped = _mm_min_epi32(xy12_clamped, size_minus_1);
|
2024-02-02 16:28:51 +01:00
|
|
|
x1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(2, 2, 0, 0));
|
|
|
|
|
y1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(3, 1, 3, 1));
|
|
|
|
|
}
|
2023-12-14 15:10:30 +01:00
|
|
|
|
|
|
|
|
/* Read the four sample values. Do address calculations in C, since SSE
|
|
|
|
|
* before 4.1 makes it very cumbersome to do full integer multiplies. */
|
|
|
|
|
int xcoord[4];
|
|
|
|
|
int ycoord[4];
|
|
|
|
|
_mm_storeu_ps((float *)xcoord, _mm_castsi128_ps(x1234));
|
|
|
|
|
_mm_storeu_ps((float *)ycoord, _mm_castsi128_ps(y1234));
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
int sample1 = ((const int *)buffer)[ycoord[0] * int64_t(width) + xcoord[0]];
|
2024-01-19 11:52:10 +11:00
|
|
|
int sample2 = ((const int *)buffer)[ycoord[1] * int64_t(width) + xcoord[1]];
|
|
|
|
|
int sample3 = ((const int *)buffer)[ycoord[2] * int64_t(width) + xcoord[2]];
|
|
|
|
|
int sample4 = ((const int *)buffer)[ycoord[3] * int64_t(width) + xcoord[3]];
|
2023-12-14 15:10:30 +01:00
|
|
|
__m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1);
|
2024-02-02 16:28:51 +01:00
|
|
|
if constexpr (border) {
|
|
|
|
|
/* Set samples to black for the ones that were actually invalid. */
|
|
|
|
|
samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
|
|
|
|
|
}
|
2023-12-14 15:10:30 +01:00
|
|
|
|
|
|
|
|
/* Expand samples from packed 8-bit RGBA to full floats:
|
|
|
|
|
* spread to 16 bit values. */
|
|
|
|
|
__m128i rgba16_12 = _mm_unpacklo_epi8(samples1234, _mm_setzero_si128());
|
|
|
|
|
__m128i rgba16_34 = _mm_unpackhi_epi8(samples1234, _mm_setzero_si128());
|
|
|
|
|
/* Spread to 32 bit values and convert to float. */
|
|
|
|
|
__m128 rgba1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_12, _mm_setzero_si128()));
|
|
|
|
|
__m128 rgba2 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_12, _mm_setzero_si128()));
|
|
|
|
|
__m128 rgba3 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_34, _mm_setzero_si128()));
|
|
|
|
|
__m128 rgba4 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_34, _mm_setzero_si128()));
|
|
|
|
|
|
|
|
|
|
/* Calculate interpolation factors: (1-a)*(1-b), (1-a)*b, a*(1-b), a*b */
|
|
|
|
|
__m128 abab = _mm_sub_ps(uvuv, uvuv_floor);
|
|
|
|
|
__m128 m_abab = _mm_sub_ps(_mm_set1_ps(1.0f), abab);
|
|
|
|
|
__m128 ab_mab = _mm_shuffle_ps(abab, m_abab, _MM_SHUFFLE(3, 2, 1, 0));
|
|
|
|
|
__m128 factors = _mm_mul_ps(_mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(0, 0, 2, 2)),
|
|
|
|
|
_mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(1, 3, 1, 3)));
|
|
|
|
|
|
|
|
|
|
/* Blend the samples. */
|
|
|
|
|
rgba1 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(0, 0, 0, 0)), rgba1);
|
|
|
|
|
rgba2 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(1, 1, 1, 1)), rgba2);
|
|
|
|
|
rgba3 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(2, 2, 2, 2)), rgba3);
|
|
|
|
|
rgba4 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(3, 3, 3, 3)), rgba4);
|
|
|
|
|
__m128 rgba13 = _mm_add_ps(rgba1, rgba3);
|
|
|
|
|
__m128 rgba24 = _mm_add_ps(rgba2, rgba4);
|
|
|
|
|
__m128 rgba = _mm_add_ps(rgba13, rgba24);
|
|
|
|
|
rgba = _mm_add_ps(rgba, _mm_set1_ps(0.5f));
|
|
|
|
|
/* Pack and write to destination: pack to 16 bit signed, then to 8 bit
|
|
|
|
|
* unsigned, then write resulting 32-bit value. */
|
|
|
|
|
__m128i rgba32 = _mm_cvttps_epi32(rgba);
|
|
|
|
|
__m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128());
|
|
|
|
|
__m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128());
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
_mm_store_ss((float *)&res, _mm_castsi128_ps(rgba8));
|
2023-12-14 15:10:30 +01:00
|
|
|
|
|
|
|
|
#else
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2023-12-14 15:10:30 +01:00
|
|
|
float uf = floorf(u);
|
|
|
|
|
float vf = floorf(v);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-02-02 10:43:18 +11:00
|
|
|
int x1 = int(uf);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
int x2 = x1 + 1;
|
2024-02-02 10:43:18 +11:00
|
|
|
int y1 = int(vf);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
int y2 = y1 + 1;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-03-08 07:50:01 +01:00
|
|
|
/* Completely outside of the image in bordered mode? */
|
|
|
|
|
if (border && (x2 < 0 || x1 >= width || y2 < 0 || y1 >= height)) {
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
return uchar4(0);
|
2023-12-14 15:10:30 +01:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
/* Sample locations. */
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
const uchar *row1, *row2, *row3, *row4;
|
|
|
|
|
uchar empty[4] = {0, 0, 0, 0};
|
2024-02-02 16:28:51 +01:00
|
|
|
if constexpr (border) {
|
|
|
|
|
row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * 4;
|
|
|
|
|
row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * 4;
|
|
|
|
|
row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * 4;
|
|
|
|
|
row4 = (x2 > width - 1 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x2) * 4;
|
2023-12-14 15:10:30 +01:00
|
|
|
}
|
|
|
|
|
else {
|
2024-02-02 16:28:51 +01:00
|
|
|
x1 = blender::math::clamp(x1, 0, width - 1);
|
|
|
|
|
x2 = blender::math::clamp(x2, 0, width - 1);
|
|
|
|
|
y1 = blender::math::clamp(y1, 0, height - 1);
|
|
|
|
|
y2 = blender::math::clamp(y2, 0, height - 1);
|
|
|
|
|
row1 = buffer + (int64_t(width) * y1 + x1) * 4;
|
|
|
|
|
row2 = buffer + (int64_t(width) * y2 + x1) * 4;
|
|
|
|
|
row3 = buffer + (int64_t(width) * y1 + x2) * 4;
|
|
|
|
|
row4 = buffer + (int64_t(width) * y2 + x2) * 4;
|
2012-11-10 19:11:25 +00:00
|
|
|
}
|
2023-12-14 15:10:30 +01:00
|
|
|
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
float a = u - uf;
|
|
|
|
|
float b = v - vf;
|
|
|
|
|
float a_b = a * b;
|
|
|
|
|
float ma_b = (1.0f - a) * b;
|
|
|
|
|
float a_mb = a * (1.0f - b);
|
|
|
|
|
float ma_mb = (1.0f - a) * (1.0f - b);
|
2023-12-14 15:10:30 +01:00
|
|
|
|
2024-02-02 10:43:18 +11:00
|
|
|
res.x = uchar(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f);
|
|
|
|
|
res.y = uchar(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f);
|
|
|
|
|
res.z = uchar(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f);
|
|
|
|
|
res.w = uchar(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f);
|
2023-12-14 15:10:30 +01:00
|
|
|
#endif
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
|
|
|
|
|
return res;
|
2012-11-10 19:11:25 +00:00
|
|
|
}
|
2012-11-11 08:48:35 +00:00
|
|
|
|
2024-02-02 16:28:51 +01:00
|
|
|
uchar4 interpolate_bilinear_border_byte(
|
|
|
|
|
const uchar *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
return bilinear_byte_impl<true>(buffer, width, height, u, v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
return bilinear_byte_impl<false>(buffer, width, height, u, v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
float4 res;
|
|
|
|
|
bilinear_fl_impl<true>(buffer, res, width, height, 4, u, v);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void interpolate_bilinear_border_fl(
|
|
|
|
|
const float *buffer, float *output, int width, int height, int components, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
bilinear_fl_impl<true>(buffer, output, width, height, components, u, v);
|
|
|
|
|
}
|
|
|
|
|
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
float4 res;
|
2024-02-02 16:28:51 +01:00
|
|
|
bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void interpolate_bilinear_fl(
|
2012-11-11 08:48:35 +00:00
|
|
|
const float *buffer, float *output, int width, int height, int components, float u, float v)
|
|
|
|
|
{
|
2024-02-02 16:28:51 +01:00
|
|
|
bilinear_fl_impl<false>(buffer, output, width, height, components, u, v);
|
2012-11-11 08:48:35 +00:00
|
|
|
}
|
2014-08-18 19:37:13 +06:00
|
|
|
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
void interpolate_bilinear_wrap_fl(const float *buffer,
|
|
|
|
|
float *output,
|
|
|
|
|
int width,
|
|
|
|
|
int height,
|
|
|
|
|
int components,
|
|
|
|
|
float u,
|
|
|
|
|
float v,
|
|
|
|
|
bool wrap_x,
|
|
|
|
|
bool wrap_y)
|
2016-02-18 01:28:41 +11:00
|
|
|
{
|
2024-02-02 16:28:51 +01:00
|
|
|
bilinear_fl_impl<false>(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
|
2016-02-18 01:28:41 +11:00
|
|
|
}
|
|
|
|
|
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
u = floored_fmod(u, float(width));
|
|
|
|
|
v = floored_fmod(v, float(height));
|
|
|
|
|
float uf = floorf(u);
|
|
|
|
|
float vf = floorf(v);
|
|
|
|
|
|
2024-02-02 10:43:18 +11:00
|
|
|
int x1 = int(uf);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
int x2 = x1 + 1;
|
2024-02-02 10:43:18 +11:00
|
|
|
int y1 = int(vf);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
int y2 = y1 + 1;
|
|
|
|
|
|
|
|
|
|
/* Wrap interpolation pixels if needed. */
|
|
|
|
|
BLI_assert(x1 >= 0 && x1 < width && y1 >= 0 && y1 < height);
|
|
|
|
|
if (x2 >= width) {
|
|
|
|
|
x2 = 0;
|
|
|
|
|
}
|
|
|
|
|
if (y2 >= height) {
|
|
|
|
|
y2 = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float a = u - uf;
|
|
|
|
|
float b = v - vf;
|
|
|
|
|
float a_b = a * b;
|
|
|
|
|
float ma_b = (1.0f - a) * b;
|
|
|
|
|
float a_mb = a * (1.0f - b);
|
|
|
|
|
float ma_mb = (1.0f - a) * (1.0f - b);
|
|
|
|
|
|
|
|
|
|
/* Blend samples. */
|
|
|
|
|
const uchar *row1 = buffer + (int64_t(width) * y1 + x1) * 4;
|
|
|
|
|
const uchar *row2 = buffer + (int64_t(width) * y2 + x1) * 4;
|
|
|
|
|
const uchar *row3 = buffer + (int64_t(width) * y1 + x2) * 4;
|
|
|
|
|
const uchar *row4 = buffer + (int64_t(width) * y2 + x2) * 4;
|
|
|
|
|
|
|
|
|
|
uchar4 res;
|
|
|
|
|
res.x = uchar(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f);
|
|
|
|
|
res.y = uchar(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f);
|
|
|
|
|
res.z = uchar(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f);
|
|
|
|
|
res.w = uchar(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
float4 res;
|
2024-02-02 16:28:51 +01:00
|
|
|
bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v, true, true);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uchar4 interpolate_cubic_bspline_byte(const uchar *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
uchar4 res;
|
2024-01-26 11:57:19 +01:00
|
|
|
bicubic_interpolation<uchar, eCubicFilter::BSpline>(buffer, res, width, height, 4, u, v);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float4 interpolate_cubic_bspline_fl(const float *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
float4 res;
|
2024-01-26 11:57:19 +01:00
|
|
|
bicubic_interpolation<float, eCubicFilter::BSpline>(buffer, res, width, height, 4, u, v);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void interpolate_cubic_bspline_fl(
|
|
|
|
|
const float *buffer, float *output, int width, int height, int components, float u, float v)
|
|
|
|
|
{
|
2024-01-26 11:57:19 +01:00
|
|
|
bicubic_interpolation<float, eCubicFilter::BSpline>(
|
|
|
|
|
buffer, output, width, height, components, u, v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uchar4 interpolate_cubic_mitchell_byte(
|
|
|
|
|
const uchar *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
uchar4 res;
|
|
|
|
|
bicubic_interpolation<uchar, eCubicFilter::Mitchell>(buffer, res, width, height, 4, u, v);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float4 interpolate_cubic_mitchell_fl(const float *buffer, int width, int height, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
float4 res;
|
|
|
|
|
bicubic_interpolation<float, eCubicFilter::Mitchell>(buffer, res, width, height, 4, u, v);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void interpolate_cubic_mitchell_fl(
|
|
|
|
|
const float *buffer, float *output, int width, int height, int components, float u, float v)
|
|
|
|
|
{
|
|
|
|
|
bicubic_interpolation<float, eCubicFilter::Mitchell>(
|
|
|
|
|
buffer, output, width, height, components, u, v);
|
ImBuf: Refactor pixel interpolation functions
There exist a bunch of "give me a (filtered) image pixel at this location"
functions, some with duplicated functionality, some with almost the same but
not quite, some that look similar but behave slightly differently, etc.
Some of them were in BLI, some were in ImBuf.
This commit tries to improve the situation by:
* Adding low level interpolation functions to `BLI_math_interp.hh`
- With documentation on their behavior,
- And with more unit tests.
* At `ImBuf` level, there are only convenience inline wrappers to the above BLI
functions (split off into a separate header `IMB_interp.hh`). However, since
these wrappers are inline, some things get a tiny bit faster as a side
effect. E.g. VSE image strip, scaling to 4K resolution (Windows/Ryzen5950X):
- Nearest filter: 2.33 -> 1.94ms
- Bilinear filter: 5.83 -> 5.69ms
- Subsampled3x3 filter: 28.6 -> 22.4ms
Details on the functions:
- All of them have `_byte` and `_fl` suffixes.
- They exist in 4-channel byte (uchar4) and float (float4), as well as
explicitly passed amount of channels for other float images.
- New functions in BLI `blender::math` namespace:
- `interpolate_nearest`
- `interpolate_bilinear`
- `interpolate_bilinear_wrap`. Note that unlike previous "wrap" function,
this one no longer requires the caller to do their own wrapping.
- `interpolate_cubic_bspline`. Previous similar function was called just
"bicubic" which could mean many different things.
- Same functions exist in `IMB_interp.hh`, they are just convenience that takes
ImBuf and uses data pointer, width, height from that.
Other bits:
- Renamed `mod_f_positive` to `floored_fmod` (better matches `safe_floored_modf`
and `floored_modulo` that exist elsewhere), made it branchless and added more
unit tests.
- `interpolate_bilinear_wrap_fl` no longer clamps result to 0..1 range. Instead,
moved the clamp to be outside of the call in `paint_image_proj.cc` and
`paint_utils.cc`. Though the need for clamping in there is also questionable.
Pull Request: https://projects.blender.org/blender/blender/pulls/117387
2024-01-25 11:45:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace blender::math
|
|
|
|
|
|
2014-08-18 19:37:13 +06:00
|
|
|
/**************************************************************************
|
|
|
|
|
* Filtering method based on
|
2019-01-15 23:15:58 +11:00
|
|
|
* "Creating raster omnimax images from multiple perspective views
|
|
|
|
|
* using the elliptical weighted average filter"
|
2014-08-18 19:37:13 +06:00
|
|
|
* by Ned Greene and Paul S. Heckbert (1986)
|
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
2023-07-07 15:11:19 +10:00
|
|
|
/* Table of `(exp(ar) - exp(a)) / (1 - exp(a))` for `r` in range [0, 1] and `a = -2`.
|
2019-04-22 00:54:27 +10:00
|
|
|
* used instead of actual gaussian,
|
|
|
|
|
* otherwise at high texture magnifications circular artifacts are visible. */
|
2014-08-18 19:37:13 +06:00
|
|
|
#define EWA_MAXIDX 255
|
|
|
|
|
const float EWA_WTS[EWA_MAXIDX + 1] = {
|
2020-11-06 11:25:27 +11:00
|
|
|
1.0f, 0.990965f, 0.982f, 0.973105f, 0.96428f, 0.955524f, 0.946836f,
|
2014-08-18 19:37:13 +06:00
|
|
|
0.938216f, 0.929664f, 0.921178f, 0.912759f, 0.904405f, 0.896117f, 0.887893f,
|
|
|
|
|
0.879734f, 0.871638f, 0.863605f, 0.855636f, 0.847728f, 0.839883f, 0.832098f,
|
|
|
|
|
0.824375f, 0.816712f, 0.809108f, 0.801564f, 0.794079f, 0.786653f, 0.779284f,
|
|
|
|
|
0.771974f, 0.76472f, 0.757523f, 0.750382f, 0.743297f, 0.736267f, 0.729292f,
|
|
|
|
|
0.722372f, 0.715505f, 0.708693f, 0.701933f, 0.695227f, 0.688572f, 0.68197f,
|
|
|
|
|
0.67542f, 0.66892f, 0.662471f, 0.656073f, 0.649725f, 0.643426f, 0.637176f,
|
|
|
|
|
0.630976f, 0.624824f, 0.618719f, 0.612663f, 0.606654f, 0.600691f, 0.594776f,
|
|
|
|
|
0.588906f, 0.583083f, 0.577305f, 0.571572f, 0.565883f, 0.56024f, 0.55464f,
|
|
|
|
|
0.549084f, 0.543572f, 0.538102f, 0.532676f, 0.527291f, 0.521949f, 0.516649f,
|
|
|
|
|
0.511389f, 0.506171f, 0.500994f, 0.495857f, 0.490761f, 0.485704f, 0.480687f,
|
|
|
|
|
0.475709f, 0.470769f, 0.465869f, 0.461006f, 0.456182f, 0.451395f, 0.446646f,
|
|
|
|
|
0.441934f, 0.437258f, 0.432619f, 0.428017f, 0.42345f, 0.418919f, 0.414424f,
|
|
|
|
|
0.409963f, 0.405538f, 0.401147f, 0.39679f, 0.392467f, 0.388178f, 0.383923f,
|
|
|
|
|
0.379701f, 0.375511f, 0.371355f, 0.367231f, 0.363139f, 0.359079f, 0.355051f,
|
|
|
|
|
0.351055f, 0.347089f, 0.343155f, 0.339251f, 0.335378f, 0.331535f, 0.327722f,
|
|
|
|
|
0.323939f, 0.320186f, 0.316461f, 0.312766f, 0.3091f, 0.305462f, 0.301853f,
|
|
|
|
|
0.298272f, 0.294719f, 0.291194f, 0.287696f, 0.284226f, 0.280782f, 0.277366f,
|
|
|
|
|
0.273976f, 0.270613f, 0.267276f, 0.263965f, 0.26068f, 0.257421f, 0.254187f,
|
|
|
|
|
0.250979f, 0.247795f, 0.244636f, 0.241502f, 0.238393f, 0.235308f, 0.232246f,
|
|
|
|
|
0.229209f, 0.226196f, 0.223206f, 0.220239f, 0.217296f, 0.214375f, 0.211478f,
|
|
|
|
|
0.208603f, 0.20575f, 0.20292f, 0.200112f, 0.197326f, 0.194562f, 0.191819f,
|
|
|
|
|
0.189097f, 0.186397f, 0.183718f, 0.18106f, 0.178423f, 0.175806f, 0.17321f,
|
|
|
|
|
0.170634f, 0.168078f, 0.165542f, 0.163026f, 0.16053f, 0.158053f, 0.155595f,
|
|
|
|
|
0.153157f, 0.150738f, 0.148337f, 0.145955f, 0.143592f, 0.141248f, 0.138921f,
|
|
|
|
|
0.136613f, 0.134323f, 0.132051f, 0.129797f, 0.12756f, 0.125341f, 0.123139f,
|
|
|
|
|
0.120954f, 0.118786f, 0.116635f, 0.114501f, 0.112384f, 0.110283f, 0.108199f,
|
|
|
|
|
0.106131f, 0.104079f, 0.102043f, 0.100023f, 0.0980186f, 0.09603f, 0.094057f,
|
|
|
|
|
0.0920994f, 0.0901571f, 0.08823f, 0.0863179f, 0.0844208f, 0.0825384f, 0.0806708f,
|
|
|
|
|
0.0788178f, 0.0769792f, 0.0751551f, 0.0733451f, 0.0715493f, 0.0697676f, 0.0679997f,
|
|
|
|
|
0.0662457f, 0.0645054f, 0.0627786f, 0.0610654f, 0.0593655f, 0.0576789f, 0.0560055f,
|
|
|
|
|
0.0543452f, 0.0526979f, 0.0510634f, 0.0494416f, 0.0478326f, 0.0462361f, 0.0446521f,
|
|
|
|
|
0.0430805f, 0.0415211f, 0.039974f, 0.0384389f, 0.0369158f, 0.0354046f, 0.0339052f,
|
|
|
|
|
0.0324175f, 0.0309415f, 0.029477f, 0.0280239f, 0.0265822f, 0.0251517f, 0.0237324f,
|
|
|
|
|
0.0223242f, 0.020927f, 0.0195408f, 0.0181653f, 0.0168006f, 0.0154466f, 0.0141031f,
|
2019-01-19 13:21:18 +11:00
|
|
|
0.0127701f, 0.0114476f, 0.0101354f, 0.00883339f, 0.00754159f, 0.00625989f, 0.00498819f,
|
2020-11-06 11:25:27 +11:00
|
|
|
0.00372644f, 0.00247454f, 0.00123242f, 0.0f,
|
2014-08-18 19:37:13 +06:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void radangle2imp(float a2, float b2, float th, float *A, float *B, float *C, float *F)
|
|
|
|
|
{
|
|
|
|
|
float ct2 = cosf(th);
|
|
|
|
|
const float st2 = 1.0f - ct2 * ct2; /* <- sin(th)^2 */
|
|
|
|
|
ct2 *= ct2;
|
2014-08-20 08:47:32 +10:00
|
|
|
*A = a2 * st2 + b2 * ct2;
|
|
|
|
|
*B = (b2 - a2) * sinf(2.0f * th);
|
|
|
|
|
*C = a2 * ct2 + b2 * st2;
|
|
|
|
|
*F = a2 * b2;
|
2014-08-18 19:37:13 +06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BLI_ewa_imp2radangle(
|
|
|
|
|
float A, float B, float C, float F, float *a, float *b, float *th, float *ecc)
|
|
|
|
|
{
|
2021-12-09 20:01:44 +11:00
|
|
|
/* NOTE: all tests here are done to make sure possible overflows are hopefully minimized. */
|
|
|
|
|
|
2014-08-18 19:37:13 +06:00
|
|
|
if (F <= 1e-5f) { /* use arbitrary major radius, zero minor, infinite eccentricity */
|
|
|
|
|
*a = sqrtf(A > C ? A : C);
|
2014-08-20 08:47:32 +10:00
|
|
|
*b = 0.0f;
|
2014-08-18 19:37:13 +06:00
|
|
|
*ecc = 1e10f;
|
2024-01-19 11:52:10 +11:00
|
|
|
*th = 0.5f * (atan2f(B, A - C) + float(M_PI));
|
2014-08-18 19:37:13 +06:00
|
|
|
}
|
|
|
|
|
else {
|
2014-08-20 08:47:32 +10:00
|
|
|
const float AmC = A - C, ApC = A + C, F2 = F * 2.0f;
|
2014-08-18 19:37:13 +06:00
|
|
|
const float r = sqrtf(AmC * AmC + B * B);
|
|
|
|
|
float d = ApC - r;
|
2014-08-20 08:47:32 +10:00
|
|
|
*a = (d <= 0.0f) ? sqrtf(A > C ? A : C) : sqrtf(F2 / d);
|
2014-08-18 19:37:13 +06:00
|
|
|
d = ApC + r;
|
2014-08-20 08:47:32 +10:00
|
|
|
if (d <= 0.0f) {
|
|
|
|
|
*b = 0.0f;
|
2014-08-18 19:37:13 +06:00
|
|
|
*ecc = 1e10f;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
*b = sqrtf(F2 / d);
|
|
|
|
|
*ecc = *a / *b;
|
|
|
|
|
}
|
2023-07-05 13:58:04 +10:00
|
|
|
/* Increase theta by `0.5 * pi` (angle of major axis). */
|
2024-01-19 11:52:10 +11:00
|
|
|
*th = 0.5f * (atan2f(B, AmC) + float(M_PI));
|
2014-08-18 19:37:13 +06:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BLI_ewa_filter(const int width,
|
|
|
|
|
const int height,
|
|
|
|
|
const bool intpol,
|
|
|
|
|
const bool use_alpha,
|
|
|
|
|
const float uv[2],
|
|
|
|
|
const float du[2],
|
|
|
|
|
const float dv[2],
|
|
|
|
|
ewa_filter_read_pixel_cb read_pixel_cb,
|
|
|
|
|
void *userdata,
|
|
|
|
|
float result[4])
|
|
|
|
|
{
|
2024-01-18 10:37:49 +11:00
|
|
|
/* Scaling `dxt` / `dyt` by full resolution can cause overflow because of huge A/B/C and esp.
|
|
|
|
|
* F values, scaling by aspect ratio alone does the opposite, so try something in between
|
|
|
|
|
* instead. */
|
2024-02-02 10:43:18 +11:00
|
|
|
const float ff2 = float(width), ff = sqrtf(ff2), q = float(height) / ff;
|
2014-10-31 12:35:20 +01:00
|
|
|
const float Ux = du[0] * ff, Vx = du[1] * q, Uy = dv[0] * ff, Vy = dv[1] * q;
|
2014-08-18 19:37:13 +06:00
|
|
|
float A = Vx * Vx + Vy * Vy;
|
|
|
|
|
float B = -2.0f * (Ux * Vx + Uy * Vy);
|
|
|
|
|
float C = Ux * Ux + Uy * Uy;
|
|
|
|
|
float F = A * C - B * B * 0.25f;
|
|
|
|
|
float a, b, th, ecc, a2, b2, ue, ve, U0, V0, DDQ, U, ac1, ac2, BU, d;
|
|
|
|
|
int u, v, u1, u2, v1, v2;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-18 10:37:49 +11:00
|
|
|
/* The so-called 'high' quality EWA method simply adds a constant of 1 to both A & C,
|
2014-08-18 19:37:13 +06:00
|
|
|
* so the ellipse always covers at least some texels. But since the filter is now always larger,
|
|
|
|
|
* it also means that everywhere else it's also more blurry then ideally should be the case.
|
|
|
|
|
* So instead here the ellipse radii are modified instead whenever either is too low.
|
2019-04-22 00:54:27 +10:00
|
|
|
* Use a different radius based on interpolation switch,
|
|
|
|
|
* just enough to anti-alias when interpolation is off,
|
|
|
|
|
* and slightly larger to make result a bit smoother than bilinear interpolation when
|
2021-07-23 16:56:00 +10:00
|
|
|
* interpolation is on (minimum values: `const float rmin = intpol ? 1.0f : 0.5f;`) */
|
2014-08-20 08:47:32 +10:00
|
|
|
const float rmin = (intpol ? 1.5625f : 0.765625f) / ff2;
|
2014-08-18 19:37:13 +06:00
|
|
|
BLI_ewa_imp2radangle(A, B, C, F, &a, &b, &th, &ecc);
|
|
|
|
|
if ((b2 = b * b) < rmin) {
|
2014-08-20 08:47:32 +10:00
|
|
|
if ((a2 = a * a) < rmin) {
|
2024-04-04 10:55:18 +11:00
|
|
|
UNUSED_VARS(a2, b2);
|
2014-08-18 19:37:13 +06:00
|
|
|
B = 0.0f;
|
|
|
|
|
A = C = rmin;
|
|
|
|
|
F = A * C;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
b2 = rmin;
|
|
|
|
|
radangle2imp(a2, b2, th, &A, &B, &C, &F);
|
2019-04-17 06:17:24 +02:00
|
|
|
}
|
2014-08-18 19:37:13 +06:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2014-08-18 19:37:13 +06:00
|
|
|
ue = ff * sqrtf(C);
|
|
|
|
|
ve = ff * sqrtf(A);
|
2024-01-19 11:52:10 +11:00
|
|
|
d = float(EWA_MAXIDX + 1) / (F * ff2);
|
2014-08-18 19:37:13 +06:00
|
|
|
A *= d;
|
|
|
|
|
B *= d;
|
|
|
|
|
C *= d;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2024-01-19 11:52:10 +11:00
|
|
|
U0 = uv[0] * float(width);
|
|
|
|
|
V0 = uv[1] * float(height);
|
|
|
|
|
u1 = int(floorf(U0 - ue));
|
|
|
|
|
u2 = int(ceilf(U0 + ue));
|
2024-02-02 10:43:18 +11:00
|
|
|
v1 = int(floorf(V0 - ve));
|
|
|
|
|
v2 = int(ceilf(V0 + ve));
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2014-08-18 19:37:13 +06:00
|
|
|
/* sane clamping to avoid unnecessarily huge loops */
|
2021-07-03 23:08:40 +10:00
|
|
|
/* NOTE: if eccentricity gets clamped (see above),
|
2014-08-18 19:37:13 +06:00
|
|
|
* the ue/ve limits can also be lowered accordingly
|
|
|
|
|
*/
|
2024-01-19 11:52:10 +11:00
|
|
|
if (U0 - float(u1) > EWA_MAXIDX) {
|
|
|
|
|
u1 = int(U0) - EWA_MAXIDX;
|
2019-03-27 13:16:10 +11:00
|
|
|
}
|
2024-01-19 11:52:10 +11:00
|
|
|
if (float(u2) - U0 > EWA_MAXIDX) {
|
|
|
|
|
u2 = int(U0) + EWA_MAXIDX;
|
2019-03-27 13:16:10 +11:00
|
|
|
}
|
2024-01-19 11:52:10 +11:00
|
|
|
if (V0 - float(v1) > EWA_MAXIDX) {
|
|
|
|
|
v1 = int(V0) - EWA_MAXIDX;
|
2019-03-27 13:16:10 +11:00
|
|
|
}
|
2024-02-02 10:43:18 +11:00
|
|
|
if (float(v2) - V0 > EWA_MAXIDX) {
|
2024-01-19 11:52:10 +11:00
|
|
|
v2 = int(V0) + EWA_MAXIDX;
|
2019-04-17 06:17:24 +02:00
|
|
|
}
|
|
|
|
|
|
2014-08-18 19:37:13 +06:00
|
|
|
/* Early output check for cases the whole region is outside of the buffer. */
|
|
|
|
|
if ((u2 < 0 || u1 >= width) || (v2 < 0 || v1 >= height)) {
|
|
|
|
|
zero_v4(result);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2014-08-18 19:37:13 +06:00
|
|
|
U0 -= 0.5f;
|
|
|
|
|
V0 -= 0.5f;
|
|
|
|
|
DDQ = 2.0f * A;
|
2024-01-19 11:52:10 +11:00
|
|
|
U = float(u1) - U0;
|
2014-08-18 19:37:13 +06:00
|
|
|
ac1 = A * (2.0f * U + 1.0f);
|
|
|
|
|
ac2 = A * U * U;
|
|
|
|
|
BU = B * U;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2014-08-18 19:37:13 +06:00
|
|
|
d = 0.0f;
|
|
|
|
|
zero_v4(result);
|
2019-09-08 00:12:26 +10:00
|
|
|
for (v = v1; v <= v2; v++) {
|
2024-02-02 10:43:18 +11:00
|
|
|
const float V = float(v) - V0;
|
2014-08-20 08:47:32 +10:00
|
|
|
float DQ = ac1 + B * V;
|
2014-08-18 19:37:13 +06:00
|
|
|
float Q = (C * V + BU) * V + ac2;
|
2019-09-08 00:12:26 +10:00
|
|
|
for (u = u1; u <= u2; u++) {
|
2024-01-19 11:52:10 +11:00
|
|
|
if (Q < float(EWA_MAXIDX + 1)) {
|
2014-08-18 19:37:13 +06:00
|
|
|
float tc[4];
|
2024-01-19 11:52:10 +11:00
|
|
|
const float wt = EWA_WTS[(Q < 0.0f) ? 0 : uint(Q)];
|
2014-08-18 19:37:13 +06:00
|
|
|
read_pixel_cb(userdata, u, v, tc);
|
|
|
|
|
madd_v3_v3fl(result, tc, wt);
|
|
|
|
|
result[3] += use_alpha ? tc[3] * wt : 0.0f;
|
|
|
|
|
d += wt;
|
|
|
|
|
}
|
|
|
|
|
Q += DQ;
|
|
|
|
|
DQ += DDQ;
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
}
|
|
|
|
|
|
2022-01-31 10:51:33 +11:00
|
|
|
/* `d` should hopefully never be zero anymore. */
|
2014-08-18 19:37:13 +06:00
|
|
|
d = 1.0f / d;
|
|
|
|
|
mul_v3_fl(result, d);
|
2022-01-31 10:51:33 +11:00
|
|
|
/* Clipping can be ignored if alpha used, `texr->trgba[3]` already includes filtered edge. */
|
2014-08-18 19:37:13 +06:00
|
|
|
result[3] = use_alpha ? result[3] * d : 1.0f;
|
|
|
|
|
}
|