Compositor: Port GLSL SMAA to CPU compositor
This patch ports the GLSL SMAA library to the CPU compositor in order to unify the anti-aliasing behavior between the CPU and GPU compositor. Additionally, the SMAA texture generator was removed since it is now unused. Previously, we used an external C++ library for SMAA anti-aliasing, which is itself a port of the GLSL SMAA library. However, the code structure and results of the library were different, which made it quite difficult to match results between CPU and GPU, hence the decision to port the library ourselves. The port was performed through a complete copy of the library to C++, retaining the same function and variable names, even if they are different from Blender's naming conversions. The necessary code changes were done to make it work in C++, including manually doing swizzling which changes the code structure a bit. Even after porting the library, there were still major differences between CPU and GPU, due to different arithmetic precision. To fix this some of the bilinear samplers used in branches and selections were carefully changed to use point samplers to avoid discontinuities around branches, also resulting in a nice performance improvement. Some slight differences still exist due to different bilinear interpolation, but they shall be looked into later once we have a baseline implementation. The new implementation is slower than the existing implementation, most likely due to the liberal use of bilinear interpolation, since it is quite cheap on GPUs and the code even does more work to use bilinear interpolation to avoid multiple texture fetches, except this causes a slow down on CPUs. Some of those were alleviated as mentioned in the previous section, but we can probably look into optimizing it further. Pull Request: https://projects.blender.org/blender/blender/pulls/119414
This commit is contained in:
4
extern/CMakeLists.txt
vendored
4
extern/CMakeLists.txt
vendored
@@ -104,10 +104,6 @@ if(WITH_MOD_FLUID)
|
||||
add_subdirectory(mantaflow)
|
||||
endif()
|
||||
|
||||
if(WITH_COMPOSITOR_CPU)
|
||||
add_subdirectory(smaa_areatex)
|
||||
endif()
|
||||
|
||||
if(WITH_VULKAN_BACKEND)
|
||||
add_subdirectory(vulkan_memory_allocator)
|
||||
endif()
|
||||
|
||||
5
extern/smaa_areatex/CMakeLists.txt
vendored
5
extern/smaa_areatex/CMakeLists.txt
vendored
@@ -1,5 +0,0 @@
|
||||
# SPDX-FileCopyrightText: 2017 Blender Foundation
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
add_executable(smaa_areatex smaa_areatex.cpp)
|
||||
5
extern/smaa_areatex/README.blender
vendored
5
extern/smaa_areatex/README.blender
vendored
@@ -1,5 +0,0 @@
|
||||
Project: smaa-cpp
|
||||
URL: https://github.com/iRi-E/smaa-cpp
|
||||
License: MIT
|
||||
Upstream version: 0.4.0
|
||||
Local modifications:
|
||||
1210
extern/smaa_areatex/smaa_areatex.cpp
vendored
1210
extern/smaa_areatex/smaa_areatex.cpp
vendored
@@ -1,1210 +0,0 @@
|
||||
/**
|
||||
* Copyright (C) 2016-2017 IRIE Shinsuke
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* smaa_areatex.cpp version 0.4.0
|
||||
*
|
||||
* This is a part of smaa-cpp that is an implementation of
|
||||
* Enhanced Subpixel Morphological Antialiasing (SMAA) written in C++.
|
||||
*
|
||||
* This program is C++ rewrite of AreaTex.py included in the original
|
||||
* SMAA ditribution:
|
||||
*
|
||||
* https://github.com/iryoku/smaa/tree/master/Scripts
|
||||
*/
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Type Definitions */
|
||||
|
||||
class Int2;
|
||||
class Dbl2;
|
||||
|
||||
class Int2 {
|
||||
public:
|
||||
int x, y;
|
||||
|
||||
Int2() { this->x = this->y = 0; }
|
||||
Int2(int x) { this->x = this->y = x; }
|
||||
Int2(int x, int y) { this->x = x; this->y = y; }
|
||||
|
||||
operator Dbl2();
|
||||
|
||||
Int2 operator + (Int2 other) { return Int2(x + other.x, y + other.y); }
|
||||
Int2 operator * (Int2 other) { return Int2(x * other.x, y * other.y); }
|
||||
};
|
||||
|
||||
class Dbl2 {
|
||||
public:
|
||||
double x, y;
|
||||
|
||||
Dbl2() { this->x = this->y = 0.0; }
|
||||
Dbl2(double x) { this->x = this->y = x; }
|
||||
Dbl2(double x, double y) { this->x = x; this->y = y; }
|
||||
|
||||
Dbl2 apply(double (* func)(double)) { return Dbl2(func(x), func(y)); }
|
||||
|
||||
operator Int2();
|
||||
|
||||
Dbl2 operator + (Dbl2 other) { return Dbl2(x + other.x, y + other.y); }
|
||||
Dbl2 operator - (Dbl2 other) { return Dbl2(x - other.x, y - other.y); }
|
||||
Dbl2 operator * (Dbl2 other) { return Dbl2(x * other.x, y * other.y); }
|
||||
Dbl2 operator / (Dbl2 other) { return Dbl2(x / other.x, y / other.y); }
|
||||
Dbl2 operator += (Dbl2 other) { return Dbl2(x += other.x, y += other.y); }
|
||||
bool operator == (Dbl2 other) { return (x == other.x && y == other.y); }
|
||||
};
|
||||
|
||||
Int2::operator Dbl2() { return Dbl2((double)x, (double)y); }
|
||||
Dbl2::operator Int2() { return Int2((int)x, (int)y); }
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Data to Calculate Areatex */
|
||||
|
||||
/* Texture sizes: */
|
||||
/* (it's quite possible that this is not easily configurable) */
|
||||
static const int SUBSAMPLES_ORTHO = 7;
|
||||
static const int SUBSAMPLES_DIAG = 5;
|
||||
static const int MAX_DIST_ORTHO_COMPAT = 16;
|
||||
static const int MAX_DIST_ORTHO = 20;
|
||||
static const int MAX_DIST_DIAG = 20;
|
||||
static const int TEX_SIZE_ORTHO = 80; /* 16 * 5 slots = 80 */
|
||||
static const int TEX_SIZE_DIAG = 80; /* 20 * 4 slots = 80 */
|
||||
|
||||
/* Number of samples for calculating areas in the diagonal textures: */
|
||||
/* (diagonal areas are calculated using brute force sampling) */
|
||||
static const int SAMPLES_DIAG = 30;
|
||||
|
||||
/* Maximum distance for smoothing u-shapes: */
|
||||
static const int SMOOTH_MAX_DISTANCE = 32;
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Offset Tables */
|
||||
|
||||
/* Offsets for subsample rendering */
|
||||
static const double subsample_offsets_ortho[SUBSAMPLES_ORTHO] = {
|
||||
0.0, /* 0 */
|
||||
-0.25, /* 1 */
|
||||
0.25, /* 2 */
|
||||
-0.125, /* 3 */
|
||||
0.125, /* 4 */
|
||||
-0.375, /* 5 */
|
||||
0.375 /* 6 */
|
||||
};
|
||||
|
||||
static const Dbl2 subsample_offsets_diag[SUBSAMPLES_DIAG] = {
|
||||
{ 0.00, 0.00}, /* 0 */
|
||||
{ 0.25, -0.25}, /* 1 */
|
||||
{-0.25, 0.25}, /* 2 */
|
||||
{ 0.125, -0.125}, /* 3 */
|
||||
{-0.125, 0.125} /* 4 */
|
||||
};
|
||||
|
||||
/* Mapping offsets for placing each pattern subtexture into its place */
|
||||
enum edgesorthoIndices
|
||||
{
|
||||
EDGESORTHO_NONE_NONE = 0,
|
||||
EDGESORTHO_NONE_NEGA = 1,
|
||||
EDGESORTHO_NONE_POSI = 2,
|
||||
EDGESORTHO_NONE_BOTH = 3,
|
||||
EDGESORTHO_NEGA_NONE = 4,
|
||||
EDGESORTHO_NEGA_NEGA = 5,
|
||||
EDGESORTHO_NEGA_POSI = 6,
|
||||
EDGESORTHO_NEGA_BOTH = 7,
|
||||
EDGESORTHO_POSI_NONE = 8,
|
||||
EDGESORTHO_POSI_NEGA = 9,
|
||||
EDGESORTHO_POSI_POSI = 10,
|
||||
EDGESORTHO_POSI_BOTH = 11,
|
||||
EDGESORTHO_BOTH_NONE = 12,
|
||||
EDGESORTHO_BOTH_NEGA = 13,
|
||||
EDGESORTHO_BOTH_POSI = 14,
|
||||
EDGESORTHO_BOTH_BOTH = 15,
|
||||
};
|
||||
|
||||
static const Int2 edgesortho_compat[16] = {
|
||||
{0, 0}, {0, 1}, {0, 3}, {0, 4}, {1, 0}, {1, 1}, {1, 3}, {1, 4},
|
||||
{3, 0}, {3, 1}, {3, 3}, {3, 4}, {4, 0}, {4, 1}, {4, 3}, {4, 4}
|
||||
};
|
||||
|
||||
static const Int2 edgesortho[16] = {
|
||||
{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3},
|
||||
{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}
|
||||
};
|
||||
|
||||
enum edgesdiagIndices
|
||||
{
|
||||
EDGESDIAG_NONE_NONE = 0,
|
||||
EDGESDIAG_NONE_VERT = 1,
|
||||
EDGESDIAG_NONE_HORZ = 2,
|
||||
EDGESDIAG_NONE_BOTH = 3,
|
||||
EDGESDIAG_VERT_NONE = 4,
|
||||
EDGESDIAG_VERT_VERT = 5,
|
||||
EDGESDIAG_VERT_HORZ = 6,
|
||||
EDGESDIAG_VERT_BOTH = 7,
|
||||
EDGESDIAG_HORZ_NONE = 8,
|
||||
EDGESDIAG_HORZ_VERT = 9,
|
||||
EDGESDIAG_HORZ_HORZ = 10,
|
||||
EDGESDIAG_HORZ_BOTH = 11,
|
||||
EDGESDIAG_BOTH_NONE = 12,
|
||||
EDGESDIAG_BOTH_VERT = 13,
|
||||
EDGESDIAG_BOTH_HORZ = 14,
|
||||
EDGESDIAG_BOTH_BOTH = 15,
|
||||
};
|
||||
|
||||
static const Int2 edgesdiag[16] = {
|
||||
{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3},
|
||||
{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}
|
||||
};
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Miscellaneous Utility Functions */
|
||||
|
||||
/* Linear interpolation: */
|
||||
static Dbl2 lerp(Dbl2 a, Dbl2 b, double p)
|
||||
{
|
||||
return a + (b - a) * Dbl2(p);
|
||||
}
|
||||
|
||||
/* Saturates a value to [0..1] range: */
|
||||
static double saturate(double x)
|
||||
{
|
||||
return 0.0 < x ? (x < 1.0 ? x : 1.0) : 0.0;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Horizontal/Vertical Areas */
|
||||
|
||||
class AreaOrtho {
|
||||
double m_data[SUBSAMPLES_ORTHO][TEX_SIZE_ORTHO][TEX_SIZE_ORTHO][2];
|
||||
bool m_compat;
|
||||
bool m_orig_u;
|
||||
public:
|
||||
AreaOrtho(bool compat, bool orig_u) : m_compat(compat), m_orig_u(orig_u) {}
|
||||
|
||||
double *getData() { return (double *)&m_data; }
|
||||
Dbl2 getPixel(int offset_index, Int2 coords) {
|
||||
return Dbl2(m_data[offset_index][coords.y][coords.x][0],
|
||||
m_data[offset_index][coords.y][coords.x][1]);
|
||||
}
|
||||
|
||||
void areaTex(int offset_index);
|
||||
private:
|
||||
void putPixel(int offset_index, Int2 coords, Dbl2 pixel) {
|
||||
m_data[offset_index][coords.y][coords.x][0] = pixel.x;
|
||||
m_data[offset_index][coords.y][coords.x][1] = pixel.y;
|
||||
}
|
||||
|
||||
Dbl2 smoothArea(double d, Dbl2 a1, Dbl2 a2);
|
||||
Dbl2 makeQuad(int x, double d, double o);
|
||||
Dbl2 area(Dbl2 p1, Dbl2 p2, int x);
|
||||
Dbl2 calculate(int pattern, int left, int right, double offset);
|
||||
};
|
||||
|
||||
/* Smoothing function for small u-patterns: */
|
||||
Dbl2 AreaOrtho::smoothArea(double d, Dbl2 a1, Dbl2 a2)
|
||||
{
|
||||
Dbl2 b1 = (a1 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5);
|
||||
Dbl2 b2 = (a2 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5);
|
||||
double p = saturate(d / (double)SMOOTH_MAX_DISTANCE);
|
||||
return lerp(b1, a1, p) + lerp(b2, a2, p);
|
||||
}
|
||||
|
||||
/* Smoothing u-patterns by quadratic function: */
|
||||
Dbl2 AreaOrtho::makeQuad(int x, double d, double o)
|
||||
{
|
||||
double r = (double)x;
|
||||
|
||||
/* fmin() below is a trick to smooth tiny u-patterns: */
|
||||
return Dbl2(r, (1.0 - fmin(4.0, d) * r * (d - r) / (d * d)) * o);
|
||||
}
|
||||
|
||||
/* Calculates the area under the line p1->p2, for the pixel x..x+1: */
|
||||
Dbl2 AreaOrtho::area(Dbl2 p1, Dbl2 p2, int x)
|
||||
{
|
||||
Dbl2 d = p2 - p1;
|
||||
double x1 = (double)x;
|
||||
double x2 = x1 + 1.0;
|
||||
|
||||
if ((x1 >= p1.x && x1 < p2.x) || (x2 > p1.x && x2 <= p2.x)) { /* inside? */
|
||||
double y1 = p1.y + (x1 - p1.x) * d.y / d.x;
|
||||
double y2 = p1.y + (x2 - p1.x) * d.y / d.x;
|
||||
|
||||
if ((copysign(1.0, y1) == copysign(1.0, y2) ||
|
||||
fabs(y1) < 1e-4 || fabs(y2) < 1e-4)) { /* trapezoid? */
|
||||
double a = (y1 + y2) / 2.0;
|
||||
if (a < 0.0)
|
||||
return Dbl2(fabs(a), 0.0);
|
||||
else
|
||||
return Dbl2(0.0, fabs(a));
|
||||
}
|
||||
else { /* Then, we got two triangles: */
|
||||
double x = p1.x - p1.y * d.x / d.y, xi;
|
||||
double a1 = x > p1.x ? y1 * modf(x, &xi) / 2.0 : 0.0;
|
||||
double a2 = x < p2.x ? y2 * (1.0 - modf(x, &xi)) / 2.0 : 0.0;
|
||||
double a = fabs(a1) > fabs(a2) ? a1 : -a2;
|
||||
if (a < 0.0)
|
||||
return Dbl2(fabs(a1), fabs(a2));
|
||||
else
|
||||
return Dbl2(fabs(a2), fabs(a1));
|
||||
}
|
||||
}
|
||||
else
|
||||
return Dbl2(0.0, 0.0);
|
||||
}
|
||||
|
||||
/* Calculates the area for a given pattern and distances to the left and to the */
|
||||
/* right, biased by an offset: */
|
||||
Dbl2 AreaOrtho::calculate(int pattern, int left, int right, double offset)
|
||||
{
|
||||
Dbl2 a1, a2;
|
||||
|
||||
/*
|
||||
* o1 |
|
||||
* .-------´
|
||||
* o2 |
|
||||
*
|
||||
* <---d--->
|
||||
*/
|
||||
double d = (double)(left + right + 1);
|
||||
|
||||
double o1 = 0.5 + offset;
|
||||
double o2 = 0.5 + offset - 1.0;
|
||||
|
||||
switch (pattern) {
|
||||
case EDGESORTHO_NONE_NONE:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* ------
|
||||
*
|
||||
*/
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_POSI_NONE:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .------
|
||||
* |
|
||||
*
|
||||
* We only offset L patterns in the crossing edge side, to make it
|
||||
* converge with the unfiltered pattern 0 (we don't want to filter the
|
||||
* pattern 0 to avoid artifacts).
|
||||
*/
|
||||
if (left <= right)
|
||||
return area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
|
||||
else
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_NONE_POSI:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* ------.
|
||||
* |
|
||||
*/
|
||||
if (left >= right)
|
||||
return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
|
||||
else
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_POSI_POSI:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .------.
|
||||
* | |
|
||||
*/
|
||||
if (m_orig_u) {
|
||||
a1 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
|
||||
a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
|
||||
return smoothArea(d, a1, a2);
|
||||
}
|
||||
else
|
||||
return area(makeQuad(left, d, o2), makeQuad(left + 1, d, o2), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_NEGA_NONE:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* `------
|
||||
*
|
||||
*/
|
||||
if (left <= right)
|
||||
return area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
|
||||
else
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_BOTH_NONE:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* +------
|
||||
* |
|
||||
*/
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_NEGA_POSI:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* `------.
|
||||
* |
|
||||
*
|
||||
* A problem of not offseting L patterns (see above), is that for certain
|
||||
* max search distances, the pixels in the center of a Z pattern will
|
||||
* detect the full Z pattern, while the pixels in the sides will detect a
|
||||
* L pattern. To avoid discontinuities, we blend the full offsetted Z
|
||||
* revectorization with partially offsetted L patterns.
|
||||
*/
|
||||
if (fabs(offset) > 0.0) {
|
||||
a1 = area(Dbl2(0.0, o1), Dbl2(d, o2), left);
|
||||
a2 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
|
||||
a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
}
|
||||
else
|
||||
return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_BOTH_POSI:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* +------.
|
||||
* | |
|
||||
*/
|
||||
return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_NONE_NEGA:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* ------´
|
||||
*
|
||||
*/
|
||||
if (left >= right)
|
||||
return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
|
||||
else
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_POSI_NEGA:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* .------´
|
||||
* |
|
||||
*/
|
||||
if (fabs(offset) > 0.0) {
|
||||
a1 = area(Dbl2(0.0, o2), Dbl2(d, o1), left);
|
||||
a2 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
|
||||
a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
}
|
||||
else
|
||||
return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_NONE_BOTH:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* ------+
|
||||
* |
|
||||
*/
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_POSI_BOTH:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* .------+
|
||||
* | |
|
||||
*/
|
||||
return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_NEGA_NEGA:
|
||||
{
|
||||
/*
|
||||
* | |
|
||||
* `------´
|
||||
*
|
||||
*/
|
||||
if (m_orig_u) {
|
||||
a1 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
|
||||
a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
|
||||
return smoothArea(d, a1, a2);
|
||||
}
|
||||
else
|
||||
return area(makeQuad(left, d, o1), makeQuad(left + 1, d, o1), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_BOTH_NEGA:
|
||||
{
|
||||
/*
|
||||
* | |
|
||||
* +------´
|
||||
* |
|
||||
*/
|
||||
return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_NEGA_BOTH:
|
||||
{
|
||||
/*
|
||||
* | |
|
||||
* `------+
|
||||
* |
|
||||
*/
|
||||
return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
|
||||
break;
|
||||
}
|
||||
case EDGESORTHO_BOTH_BOTH:
|
||||
{
|
||||
/*
|
||||
* | |
|
||||
* +------+
|
||||
* | |
|
||||
*/
|
||||
return Dbl2(0.0, 0.0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return Dbl2(0.0, 0.0);
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Diagonal Areas */
|
||||
|
||||
class AreaDiag {
|
||||
double m_data[SUBSAMPLES_DIAG][TEX_SIZE_DIAG][TEX_SIZE_DIAG][2];
|
||||
bool m_numeric;
|
||||
bool m_orig_u;
|
||||
public:
|
||||
AreaDiag(bool numeric, bool orig_u) : m_numeric(numeric), m_orig_u(orig_u) {}
|
||||
|
||||
double *getData() { return (double *)&m_data; }
|
||||
Dbl2 getPixel(int offset_index, Int2 coords) {
|
||||
return Dbl2(m_data[offset_index][coords.y][coords.x][0],
|
||||
m_data[offset_index][coords.y][coords.x][1]);
|
||||
}
|
||||
|
||||
void areaTex(int offset_index);
|
||||
private:
|
||||
void putPixel(int offset_index, Int2 coords, Dbl2 pixel) {
|
||||
m_data[offset_index][coords.y][coords.x][0] = pixel.x;
|
||||
m_data[offset_index][coords.y][coords.x][1] = pixel.y;
|
||||
}
|
||||
|
||||
double area1(Dbl2 p1, Dbl2 p2, Int2 p);
|
||||
Dbl2 area(Dbl2 p1, Dbl2 p2, int left);
|
||||
Dbl2 areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left);
|
||||
Dbl2 calculate(int pattern, int left, int right, Dbl2 offset);
|
||||
};
|
||||
|
||||
/* Calculates the area under the line p1->p2 for the pixel 'p' using brute */
|
||||
/* force sampling: */
|
||||
/* (quick and dirty solution, but it works) */
|
||||
double AreaDiag::area1(Dbl2 p1, Dbl2 p2, Int2 p)
|
||||
{
|
||||
if (p1 == p2)
|
||||
return 1.0;
|
||||
|
||||
double xm = (p1.x + p2.x) / 2.0, ym = (p1.y + p2.y) / 2.0;
|
||||
double a = p2.y - p1.y;
|
||||
double b = p1.x - p2.x;
|
||||
int count = 0;
|
||||
|
||||
for (int ix = 0; ix < SAMPLES_DIAG; ix++) {
|
||||
double x = (double)p.x + (double)ix / (double)(SAMPLES_DIAG - 1);
|
||||
for (int iy = 0; iy < SAMPLES_DIAG; iy++) {
|
||||
double y = (double)p.y + (double)iy / (double)(SAMPLES_DIAG - 1);
|
||||
if (a * (x - xm) + b * (y - ym) > 0.0) /* inside? */
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return (double)count / (double)(SAMPLES_DIAG * SAMPLES_DIAG);
|
||||
}
|
||||
|
||||
/* Calculates the area under the line p1->p2: */
|
||||
/* (includes the pixel and its opposite) */
|
||||
Dbl2 AreaDiag::area(Dbl2 p1, Dbl2 p2, int left)
|
||||
{
|
||||
if (m_numeric) {
|
||||
double a1 = area1(p1, p2, Int2(1, 0) + Int2(left));
|
||||
double a2 = area1(p1, p2, Int2(1, 1) + Int2(left));
|
||||
return Dbl2(1.0 - a1, a2);
|
||||
}
|
||||
|
||||
/* Calculates the area under the line p1->p2 for the pixel 'p' analytically */
|
||||
Dbl2 d = p2 - p1;
|
||||
if (d.x == 0.0)
|
||||
return Dbl2(0.0, 1.0);
|
||||
if (d.y == 0.0)
|
||||
return Dbl2(1.0, 0.0);
|
||||
|
||||
double x1 = (double)(1 + left);
|
||||
double x2 = x1 + 1.0;
|
||||
double ymid = x1;
|
||||
double xtop = p1.x + (ymid + 1.0 - p1.y) * d.x / d.y;
|
||||
double xmid = p1.x + (ymid - p1.y) * d.x / d.y;
|
||||
double xbot = p1.x + (ymid - 1.0 - p1.y) * d.x / d.y;
|
||||
|
||||
double y1 = p1.y + (x1 - p1.x) * d.y / d.x;
|
||||
double y2 = p1.y + (x2 - p1.x) * d.y / d.x;
|
||||
double fy1 = y1 - floor(y1);
|
||||
double fy2 = y2 - floor(y2);
|
||||
int iy1 = (int)floor(y1 - ymid);
|
||||
int iy2 = (int)floor(y2 - ymid);
|
||||
|
||||
if (iy1 <= -2) {
|
||||
if (iy2 == -1)
|
||||
return Dbl2(1.0 - (x2 - xbot) * fy2 * 0.5, 0.0);
|
||||
else if (iy2 == 0)
|
||||
return Dbl2((xmid + xbot) * 0.5 - x1, (x2 - xmid) * fy2 * 0.5);
|
||||
else if (iy2 >= 1)
|
||||
return Dbl2((xmid + xbot) * 0.5 - x1, x2 - (xtop + xmid) * 0.5);
|
||||
else /* iy2 < -1 */
|
||||
return Dbl2(1.0, 0.0);
|
||||
}
|
||||
else if (iy1 == -1) {
|
||||
if (iy2 == -1)
|
||||
return Dbl2(1.0 - (fy1 + fy2) * 0.5, 0.0);
|
||||
else if (iy2 == 0)
|
||||
return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, (x2 - xmid) * fy2 * 0.5);
|
||||
else if (iy2 >= 1)
|
||||
return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, x2 - (xtop + xmid) * 0.5);
|
||||
else /* iy2 < -1 */
|
||||
return Dbl2(1.0 - (xbot - x1) * fy1 * 0.5, 0.0);
|
||||
}
|
||||
else if (iy1 == 0) {
|
||||
if (iy2 == -1)
|
||||
return Dbl2((x2 - xmid) * (1.0 - fy2) * 0.5, (xmid - x1) * fy1 * 0.5);
|
||||
else if (iy2 == 0)
|
||||
return Dbl2(0.0, (fy1 + fy2) * 0.5);
|
||||
else if (iy2 >= 1)
|
||||
return Dbl2(0.0, 1.0 - (xtop - x1) * (1.0 - fy1) * 0.5);
|
||||
else /* iy2 < -1 */
|
||||
return Dbl2(x2 - (xmid + xbot) * 0.5, (xmid - x1) * fy1 * 0.5);
|
||||
}
|
||||
else { /* iy1 > 0 */
|
||||
if (iy2 == -1)
|
||||
return Dbl2((x2 - xtop) * (1.0 - fy2) * 0.5, (xtop + xmid) * 0.5 - x1);
|
||||
else if (iy2 == 0)
|
||||
return Dbl2(0.0, 1.0 - (x1 - xtop) * (1.0 - fy2) * 0.5);
|
||||
else if (iy2 >= 1)
|
||||
return Dbl2(0.0, 1.0);
|
||||
else /* iy2 < -1 */
|
||||
return Dbl2(x2 - (xmid + xbot) * 0.5, (xtop + xmid) * 0.5 - x1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate u-patterns using a triangle: */
|
||||
Dbl2 AreaDiag::areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left)
|
||||
{
|
||||
double x1 = (double)(1 + left);
|
||||
double x2 = x1 + 1.0;
|
||||
|
||||
Dbl2 dL = p2L - p1L;
|
||||
Dbl2 dR = p2R - p1R;
|
||||
double xm = ((p1L.x * dL.y / dL.x - p1L.y) - (p1R.x * dR.y / dR.x - p1R.y)) / (dL.y / dL.x - dR.y / dR.x);
|
||||
|
||||
double y1 = (x1 < xm) ? p1L.y + (x1 - p1L.x) * dL.y / dL.x : p1R.y + (x1 - p1R.x) * dR.y / dR.x;
|
||||
double y2 = (x2 < xm) ? p1L.y + (x2 - p1L.x) * dL.y / dL.x : p1R.y + (x2 - p1R.x) * dR.y / dR.x;
|
||||
|
||||
return area(Dbl2(x1, y1), Dbl2(x2, y2), left);
|
||||
}
|
||||
|
||||
/* Calculates the area for a given pattern and distances to the left and to the */
|
||||
/* right, biased by an offset: */
|
||||
Dbl2 AreaDiag::calculate(int pattern, int left, int right, Dbl2 offset)
|
||||
{
|
||||
Dbl2 a1, a2;
|
||||
|
||||
double d = (double)(left + right + 1);
|
||||
|
||||
/*
|
||||
* There is some Black Magic around diagonal area calculations. Unlike
|
||||
* orthogonal patterns, the 'null' pattern (one without crossing edges) must be
|
||||
* filtered, and the ends of both the 'null' and L patterns are not known: L
|
||||
* and U patterns have different endings, and we don't know what is the
|
||||
* adjacent pattern. So, what we do is calculate a blend of both possibilites.
|
||||
*/
|
||||
switch (pattern) {
|
||||
case EDGESDIAG_NONE_NONE:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* ´
|
||||
*
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 1.0), Dbl2(1.0, 1.0) + Dbl2(d), left); /* 1st possibility */
|
||||
a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d), left); /* 2nd possibility */
|
||||
return (a1 + a2) / Dbl2(2.0); /* Blend them */
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_VERT_NONE:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
|
||||
a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_NONE_HORZ:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* ´
|
||||
*
|
||||
*/
|
||||
a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_VERT_HORZ:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
if (m_orig_u)
|
||||
return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
else
|
||||
return areaTriangle(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d),
|
||||
Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_HORZ_NONE:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* ----´
|
||||
*
|
||||
*
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
|
||||
a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_BOTH_NONE:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* --.-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
|
||||
a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_HORZ_HORZ:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* ----´
|
||||
*
|
||||
*
|
||||
*/
|
||||
return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_BOTH_HORZ:
|
||||
{
|
||||
/*
|
||||
*
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* --.-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_NONE_VERT:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* |
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* ´
|
||||
*
|
||||
*/
|
||||
a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_VERT_VERT:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* |
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_NONE_BOTH:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* ´
|
||||
*
|
||||
*/
|
||||
a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_VERT_BOTH:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* .-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_HORZ_VERT:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* |
|
||||
* .-´
|
||||
* .-´
|
||||
* ----´
|
||||
*
|
||||
*
|
||||
*/
|
||||
if (m_orig_u)
|
||||
return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
else
|
||||
return areaTriangle(Dbl2(1.0, 1.0) + offset, Dbl2(2.0, 1.0) + Dbl2(d),
|
||||
Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_BOTH_VERT:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* |
|
||||
* .-´
|
||||
* .-´
|
||||
* --.-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_HORZ_BOTH:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* ----´
|
||||
*
|
||||
*
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
case EDGESDIAG_BOTH_BOTH:
|
||||
{
|
||||
/*
|
||||
* |
|
||||
* .----
|
||||
* .-´
|
||||
* .-´
|
||||
* --.-´
|
||||
* |
|
||||
* |
|
||||
*/
|
||||
a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
|
||||
a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
|
||||
return (a1 + a2) / Dbl2(2.0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return Dbl2(0.0, 0.0);
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Main Loops */
|
||||
|
||||
void AreaOrtho::areaTex(int offset_index)
|
||||
{
|
||||
double offset = subsample_offsets_ortho[offset_index];
|
||||
int max_dist = m_compat ? MAX_DIST_ORTHO_COMPAT : MAX_DIST_ORTHO;
|
||||
|
||||
for (int pattern = 0; pattern < 16; pattern++) {
|
||||
Int2 e = Int2(max_dist) * (m_compat ? edgesortho_compat : edgesortho)[pattern];
|
||||
for (int left = 0; left < max_dist; left++) {
|
||||
for (int right = 0; right < max_dist; right++) {
|
||||
Dbl2 p = calculate(pattern, left * left, right * right, offset);
|
||||
Int2 coords = e + Int2(left, right);
|
||||
|
||||
putPixel(offset_index, coords, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void AreaDiag::areaTex(int offset_index)
|
||||
{
|
||||
Dbl2 offset = subsample_offsets_diag[offset_index];
|
||||
|
||||
for (int pattern = 0; pattern < 16; pattern++) {
|
||||
Int2 e = Int2(MAX_DIST_DIAG) * edgesdiag[pattern];
|
||||
for (int left = 0; left < MAX_DIST_DIAG; left++) {
|
||||
for (int right = 0; right < MAX_DIST_DIAG; right++) {
|
||||
Dbl2 p = calculate(pattern, left, right, offset);
|
||||
Int2 coords = e + Int2(left, right);
|
||||
|
||||
putPixel(offset_index, coords, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------*/
|
||||
/* Write File to Specified Location on Disk */
|
||||
|
||||
/* C/C++ source code (arrays of floats) */
|
||||
static void write_double_array(FILE *fp, const double *ptr, int length, const char *array_name, bool quantize)
|
||||
{
|
||||
fprintf(fp, "static const float %s[%d] = {", array_name, length);
|
||||
|
||||
for (int n = 0; n < length; n++) {
|
||||
if (n > 0)
|
||||
fprintf(fp, ",");
|
||||
fprintf(fp, (n % 8 != 0) ? " " : "\n\t");
|
||||
|
||||
if (quantize)
|
||||
fprintf(fp, "%3d / 255.0", (int)(*(ptr++) * 255.0));
|
||||
else
|
||||
fprintf(fp, "%1.8lf", *(ptr++));
|
||||
}
|
||||
|
||||
fprintf(fp, "\n};\n");
|
||||
}
|
||||
|
||||
static void write_csource(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling, bool quantize)
|
||||
{
|
||||
fprintf(fp, "/* This file was generated by smaa_areatex.cpp */\n");
|
||||
|
||||
fprintf(fp, "\n/* Horizontal/Vertical Areas */\n");
|
||||
write_double_array(fp, ortho->getData(),
|
||||
TEX_SIZE_ORTHO * TEX_SIZE_ORTHO * 2 * (subsampling ? SUBSAMPLES_ORTHO : 1),
|
||||
"areatex", quantize);
|
||||
|
||||
fprintf(fp, "\n/* Diagonal Areas */\n");
|
||||
write_double_array(fp, diag->getData(),
|
||||
TEX_SIZE_DIAG * TEX_SIZE_DIAG * 2 * (subsampling ? SUBSAMPLES_DIAG : 1),
|
||||
"areatex_diag", quantize);
|
||||
}
|
||||
|
||||
/* .tga File (RGBA 32bit uncompressed) */
|
||||
static void write_tga(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling)
|
||||
{
|
||||
int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1;
|
||||
unsigned char header[18] = {0, 0,
|
||||
2, /* uncompressed RGB */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
32, /* 32bit */
|
||||
8}; /* 8bit alpha, left to right, bottom to top */
|
||||
|
||||
/* Set width and height */
|
||||
header[12] = (TEX_SIZE_ORTHO + TEX_SIZE_DIAG) & 0xff;
|
||||
header[13] = ((TEX_SIZE_ORTHO + TEX_SIZE_DIAG) >> 8) & 0xff;
|
||||
header[14] = (subsamples * TEX_SIZE_ORTHO) & 0xff;
|
||||
header[15] = ((subsamples * TEX_SIZE_ORTHO) >> 8) & 0xff;
|
||||
|
||||
/* Write .tga header */
|
||||
fwrite(header, sizeof(unsigned char), sizeof(header) / sizeof(unsigned char), fp);
|
||||
|
||||
/* Write pixel data */
|
||||
for (int i = subsamples - 1; i >= 0; i--) {
|
||||
for (int y = TEX_SIZE_ORTHO - 1; y >= 0; y--) {
|
||||
for (int x = 0; x < TEX_SIZE_ORTHO; x++) {
|
||||
Dbl2 p = ortho->getPixel(i, Int2(x, y));
|
||||
fputc(0, fp); /* B */
|
||||
fputc((unsigned char)(p.y * 255.0), fp); /* G */
|
||||
fputc((unsigned char)(p.x * 255.0), fp); /* R */
|
||||
fputc(0, fp); /* A */
|
||||
}
|
||||
|
||||
for (int x = 0; x < TEX_SIZE_DIAG; x++) {
|
||||
if (i < SUBSAMPLES_DIAG) {
|
||||
Dbl2 p = diag->getPixel(i, Int2(x, y));
|
||||
fputc(0, fp); /* B */
|
||||
fputc((unsigned char)(p.y * 255.0), fp); /* G */
|
||||
fputc((unsigned char)(p.x * 255.0), fp); /* R */
|
||||
fputc(0, fp); /* A */
|
||||
}
|
||||
else {
|
||||
fputc(0, fp);
|
||||
fputc(0, fp);
|
||||
fputc(0, fp);
|
||||
fputc(0, fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* .raw File (R8G8 raw data) */
|
||||
static void write_raw(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling)
|
||||
{
|
||||
int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1;
|
||||
|
||||
/* Write pixel data */
|
||||
for (int i = 0; i < subsamples; i++) {
|
||||
for (int y = 0; y < TEX_SIZE_ORTHO; y++) {
|
||||
for (int x = 0; x < TEX_SIZE_ORTHO; x++) {
|
||||
Dbl2 p = ortho->getPixel(i, Int2(x, y));
|
||||
fputc((unsigned char)(p.x * 255.0), fp); /* R */
|
||||
fputc((unsigned char)(p.y * 255.0), fp); /* G */
|
||||
}
|
||||
|
||||
for (int x = 0; x < TEX_SIZE_DIAG; x++) {
|
||||
if (i < SUBSAMPLES_DIAG) {
|
||||
Dbl2 p = diag->getPixel(i, Int2(x, y));
|
||||
fputc((unsigned char)(p.x * 255.0), fp); /* R */
|
||||
fputc((unsigned char)(p.y * 255.0), fp); /* G */
|
||||
}
|
||||
else {
|
||||
fputc(0, fp);
|
||||
fputc(0, fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int generate_file(AreaOrtho *ortho, AreaDiag *diag, const char *path, bool subsampling, bool quantize, bool tga, bool raw)
|
||||
{
|
||||
FILE *fp = fopen(path, tga ? "wb" : "w");
|
||||
|
||||
if (!fp) {
|
||||
fprintf(stderr, "Unable to open file: %s\n", path);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// fprintf(stderr, "Generating %s\n", path);
|
||||
|
||||
if (tga)
|
||||
write_tga(ortho, diag, fp, subsampling);
|
||||
else if (raw)
|
||||
write_raw(ortho, diag, fp, subsampling);
|
||||
else
|
||||
write_csource(ortho, diag, fp, subsampling, quantize);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
bool subsampling = false;
|
||||
bool quantize = false;
|
||||
bool tga = false;
|
||||
bool raw = false;
|
||||
bool compat = false;
|
||||
bool numeric = false;
|
||||
bool orig_u = false;
|
||||
bool help = false;
|
||||
char *outfile = NULL;
|
||||
int status = 0;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
char *ptr = argv[i];
|
||||
if (*ptr++ == '-' && *ptr != '\0') {
|
||||
char c;
|
||||
while ((c = *ptr++) != '\0') {
|
||||
if (c == 's')
|
||||
subsampling = true;
|
||||
else if (c == 'q')
|
||||
quantize = true;
|
||||
else if (c == 't')
|
||||
tga = true;
|
||||
else if (c == 'r')
|
||||
raw = true;
|
||||
else if (c == 'c')
|
||||
compat = true;
|
||||
else if (c == 'n')
|
||||
numeric = true;
|
||||
else if (c == 'u')
|
||||
orig_u = true;
|
||||
else if (c == 'h')
|
||||
help = true;
|
||||
else {
|
||||
fprintf(stderr, "Unknown option: -%c\n", c);
|
||||
status = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (outfile) {
|
||||
fprintf(stderr, "Too much file names: %s, %s\n", outfile, argv[i]);
|
||||
status = 1;
|
||||
}
|
||||
else
|
||||
outfile = argv[i];
|
||||
|
||||
if (status != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (status == 0 && !help && !outfile) {
|
||||
fprintf(stderr, "File name was not specified.\n");
|
||||
status = 1;
|
||||
}
|
||||
|
||||
if (status != 0 || help) {
|
||||
fprintf(stderr, "Usage: %s [OPTION]... OUTFILE\n", argv[0]);
|
||||
fprintf(stderr, "Options:\n");
|
||||
fprintf(stderr, " -s Calculate data for subpixel rendering\n");
|
||||
fprintf(stderr, " -q Quantize data to 256 levels\n");
|
||||
fprintf(stderr, " -t Write TGA image instead of C/C++ source\n");
|
||||
fprintf(stderr, " -r Write R8G8 raw image instead of C/C++ source\n");
|
||||
fprintf(stderr, " -c Generate compatible orthogonal data that subtexture size is 16\n");
|
||||
fprintf(stderr, " -n Numerically calculate diagonal data using brute force sampling\n");
|
||||
fprintf(stderr, " -u Process orthogonal / diagonal U patterns in older ways\n");
|
||||
fprintf(stderr, " -h Print this help and exit\n");
|
||||
fprintf(stderr, "File name OUTFILE usually should have an extension such as .c, .h, or .tga,\n");
|
||||
fprintf(stderr, "except for a special name '-' that means standard output.\n\n");
|
||||
fprintf(stderr, "Example:\n");
|
||||
fprintf(stderr, " Generate TGA file exactly same as AreaTexDX10.tga bundled with the\n");
|
||||
fprintf(stderr, " original implementation:\n\n");
|
||||
fprintf(stderr, " $ smaa_areatex -stcnu AreaTexDX10.tga\n\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
AreaOrtho *ortho = new AreaOrtho(compat, orig_u);
|
||||
AreaDiag *diag = new AreaDiag(numeric, orig_u);
|
||||
|
||||
/* Calculate areatex data */
|
||||
for (int i = 0; i < (subsampling ? SUBSAMPLES_ORTHO : 1); i++)
|
||||
ortho->areaTex(i);
|
||||
|
||||
for (int i = 0; i < (subsampling ? SUBSAMPLES_DIAG : 1); i++)
|
||||
diag->areaTex(i);
|
||||
|
||||
/* Generate .tga, .raw, or C/C++ source file, or write the data to stdout */
|
||||
if (strcmp(outfile, "-") != 0)
|
||||
status = generate_file(ortho, diag, outfile, subsampling, quantize, tga, raw);
|
||||
else if (tga)
|
||||
write_tga(ortho, diag, stdout, subsampling);
|
||||
else if (raw)
|
||||
write_raw(ortho, diag, stdout, subsampling);
|
||||
else
|
||||
write_csource(ortho, diag, stdout, subsampling, quantize);
|
||||
|
||||
delete ortho;
|
||||
delete diag;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/* smaa_areatex.cpp ends here */
|
||||
@@ -584,20 +584,6 @@ if(WITH_COMPOSITOR_CPU)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/operations
|
||||
)
|
||||
|
||||
set(GENSRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/operations)
|
||||
set(GENSRC ${GENSRC_DIR}/COM_SMAAAreaTexture.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${GENSRC}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${GENSRC_DIR}
|
||||
COMMAND "$<TARGET_FILE:smaa_areatex>" ${GENSRC}
|
||||
DEPENDS smaa_areatex
|
||||
)
|
||||
list(APPEND SRC
|
||||
${GENSRC}
|
||||
)
|
||||
unset(GENSRC)
|
||||
unset(GENSRC_DIR)
|
||||
|
||||
if(WITH_OPENIMAGEDENOISE)
|
||||
add_definitions(-DWITH_OPENIMAGEDENOISE)
|
||||
add_definitions(-DOIDN_STATIC_LIB)
|
||||
|
||||
@@ -7,37 +7,41 @@
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
/* Blender encodes the threshold in the [0, 1] range, while the SMAA algorithm expects it in
|
||||
* the [0, 0.5] range. */
|
||||
static float get_threshold(const NodeAntiAliasingData *data)
|
||||
{
|
||||
return data->threshold / 2.0f;
|
||||
}
|
||||
|
||||
/* Blender encodes the local contrast adaptation factor in the [0, 1] range, while the SMAA
|
||||
* algorithm expects it in the [0, 10] range. */
|
||||
static float get_local_contrast_adaptation_factor(const NodeAntiAliasingData *data)
|
||||
{
|
||||
return data->contrast_limit * 10.0f;
|
||||
}
|
||||
|
||||
/* Blender encodes the corner rounding factor in the float [0, 1] range, while the SMAA algorithm
|
||||
* expects it in the integer [0, 100] range. */
|
||||
static int get_corner_rounding(const NodeAntiAliasingData *data)
|
||||
{
|
||||
return int(data->corner_rounding * 100.0f);
|
||||
}
|
||||
|
||||
void AntiAliasingNode::convert_to_operations(NodeConverter &converter,
|
||||
const CompositorContext & /*context*/) const
|
||||
{
|
||||
const bNode *node = this->get_bnode();
|
||||
const NodeAntiAliasingData *data = (const NodeAntiAliasingData *)node->storage;
|
||||
|
||||
/* Edge Detection (First Pass) */
|
||||
SMAAEdgeDetectionOperation *operation1 = nullptr;
|
||||
SMAAOperation *operation = new SMAAOperation();
|
||||
operation->set_threshold(get_threshold(data));
|
||||
operation->set_local_contrast_adaptation_factor(get_local_contrast_adaptation_factor(data));
|
||||
operation->set_corner_rounding(get_corner_rounding(data));
|
||||
converter.add_operation(operation);
|
||||
|
||||
operation1 = new SMAAEdgeDetectionOperation();
|
||||
operation1->set_threshold(data->threshold);
|
||||
operation1->set_local_contrast_adaptation_factor(data->contrast_limit);
|
||||
converter.add_operation(operation1);
|
||||
|
||||
converter.map_input_socket(get_input_socket(0), operation1->get_input_socket(0));
|
||||
|
||||
/* Blending Weight Calculation Pixel Shader (Second Pass) */
|
||||
SMAABlendingWeightCalculationOperation *operation2 =
|
||||
new SMAABlendingWeightCalculationOperation();
|
||||
operation2->set_corner_rounding(data->corner_rounding);
|
||||
converter.add_operation(operation2);
|
||||
|
||||
converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0));
|
||||
|
||||
/* Neighborhood Blending Pixel Shader (Third Pass) */
|
||||
SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation();
|
||||
converter.add_operation(operation3);
|
||||
|
||||
converter.map_input_socket(get_input_socket(0), operation3->get_input_socket(0));
|
||||
converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1));
|
||||
converter.map_output_socket(get_output_socket(0), operation3->get_output_socket());
|
||||
converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
|
||||
converter.map_output_socket(get_output_socket(0), operation->get_output_socket());
|
||||
}
|
||||
|
||||
} // namespace blender::compositor
|
||||
|
||||
@@ -18,28 +18,13 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter,
|
||||
PlaneCornerPinMaskOperation *plane_mask_operation = new PlaneCornerPinMaskOperation();
|
||||
converter.add_operation(plane_mask_operation);
|
||||
|
||||
SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
|
||||
converter.add_operation(smaa_edge_detection);
|
||||
SMAAOperation *smaa_operation = new SMAAOperation();
|
||||
converter.add_operation(smaa_operation);
|
||||
|
||||
converter.add_link(plane_mask_operation->get_output_socket(),
|
||||
smaa_edge_detection->get_input_socket(0));
|
||||
smaa_operation->get_input_socket(0));
|
||||
|
||||
SMAABlendingWeightCalculationOperation *smaa_blending_weights =
|
||||
new SMAABlendingWeightCalculationOperation();
|
||||
converter.add_operation(smaa_blending_weights);
|
||||
|
||||
converter.add_link(smaa_edge_detection->get_output_socket(),
|
||||
smaa_blending_weights->get_input_socket(0));
|
||||
|
||||
SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
|
||||
converter.add_operation(smaa_neighborhood);
|
||||
|
||||
converter.add_link(plane_mask_operation->get_output_socket(),
|
||||
smaa_neighborhood->get_input_socket(0));
|
||||
converter.add_link(smaa_blending_weights->get_output_socket(),
|
||||
smaa_neighborhood->get_input_socket(1));
|
||||
|
||||
converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket());
|
||||
converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket());
|
||||
|
||||
PlaneCornerPinWarpImageOperation *warp_image_operation = new PlaneCornerPinWarpImageOperation();
|
||||
converter.add_operation(warp_image_operation);
|
||||
@@ -62,7 +47,7 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter,
|
||||
converter.add_operation(set_alpha_operation);
|
||||
converter.add_link(warp_image_operation->get_output_socket(),
|
||||
set_alpha_operation->get_input_socket(0));
|
||||
converter.add_link(smaa_neighborhood->get_output_socket(),
|
||||
converter.add_link(smaa_operation->get_output_socket(),
|
||||
set_alpha_operation->get_input_socket(1));
|
||||
converter.map_output_socket(this->get_output_socket(0),
|
||||
set_alpha_operation->get_output_socket());
|
||||
|
||||
@@ -37,26 +37,10 @@ void DilateErodeNode::convert_to_operations(NodeConverter &converter,
|
||||
converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
|
||||
|
||||
if (editor_node->custom3 < 2.0f) {
|
||||
SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
|
||||
converter.add_operation(smaa_edge_detection);
|
||||
|
||||
converter.add_link(operation->get_output_socket(), smaa_edge_detection->get_input_socket(0));
|
||||
|
||||
SMAABlendingWeightCalculationOperation *smaa_blending_weights =
|
||||
new SMAABlendingWeightCalculationOperation();
|
||||
converter.add_operation(smaa_blending_weights);
|
||||
|
||||
converter.add_link(smaa_edge_detection->get_output_socket(),
|
||||
smaa_blending_weights->get_input_socket(0));
|
||||
|
||||
SMAANeighborhoodBlendingOperation *smaa_neighborhood =
|
||||
new SMAANeighborhoodBlendingOperation();
|
||||
converter.add_operation(smaa_neighborhood);
|
||||
|
||||
converter.add_link(operation->get_output_socket(), smaa_neighborhood->get_input_socket(0));
|
||||
converter.add_link(smaa_blending_weights->get_output_socket(),
|
||||
smaa_neighborhood->get_input_socket(1));
|
||||
converter.map_output_socket(get_output_socket(0), smaa_neighborhood->get_output_socket());
|
||||
SMAAOperation *smaa_operation = new SMAAOperation();
|
||||
converter.add_operation(smaa_operation);
|
||||
converter.add_link(operation->get_output_socket(), smaa_operation->get_input_socket(0));
|
||||
converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket());
|
||||
}
|
||||
else {
|
||||
converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
|
||||
|
||||
@@ -27,27 +27,10 @@ void IDMaskNode::convert_to_operations(NodeConverter &converter,
|
||||
converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
|
||||
}
|
||||
else {
|
||||
SMAAEdgeDetectionOperation *operation1 = nullptr;
|
||||
|
||||
operation1 = new SMAAEdgeDetectionOperation();
|
||||
converter.add_operation(operation1);
|
||||
|
||||
converter.add_link(operation->get_output_socket(0), operation1->get_input_socket(0));
|
||||
|
||||
/* Blending Weight Calculation Pixel Shader (Second Pass). */
|
||||
SMAABlendingWeightCalculationOperation *operation2 =
|
||||
new SMAABlendingWeightCalculationOperation();
|
||||
converter.add_operation(operation2);
|
||||
|
||||
converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0));
|
||||
|
||||
/* Neighborhood Blending Pixel Shader (Third Pass). */
|
||||
SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation();
|
||||
converter.add_operation(operation3);
|
||||
|
||||
converter.add_link(operation->get_output_socket(0), operation3->get_input_socket(0));
|
||||
converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1));
|
||||
converter.map_output_socket(get_output_socket(0), operation3->get_output_socket());
|
||||
SMAAOperation *smaa_operation = new SMAAOperation();
|
||||
converter.add_operation(smaa_operation);
|
||||
converter.add_link(operation->get_output_socket(0), smaa_operation->get_input_socket(0));
|
||||
converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,28 +35,13 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter,
|
||||
}
|
||||
converter.add_operation(plane_mask_operation);
|
||||
|
||||
SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
|
||||
converter.add_operation(smaa_edge_detection);
|
||||
SMAAOperation *smaa_operation = new SMAAOperation();
|
||||
converter.add_operation(smaa_operation);
|
||||
|
||||
converter.add_link(plane_mask_operation->get_output_socket(),
|
||||
smaa_edge_detection->get_input_socket(0));
|
||||
smaa_operation->get_input_socket(0));
|
||||
|
||||
SMAABlendingWeightCalculationOperation *smaa_blending_weights =
|
||||
new SMAABlendingWeightCalculationOperation();
|
||||
converter.add_operation(smaa_blending_weights);
|
||||
|
||||
converter.add_link(smaa_edge_detection->get_output_socket(),
|
||||
smaa_blending_weights->get_input_socket(0));
|
||||
|
||||
SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
|
||||
converter.add_operation(smaa_neighborhood);
|
||||
|
||||
converter.add_link(plane_mask_operation->get_output_socket(),
|
||||
smaa_neighborhood->get_input_socket(0));
|
||||
converter.add_link(smaa_blending_weights->get_output_socket(),
|
||||
smaa_neighborhood->get_input_socket(1));
|
||||
|
||||
converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket());
|
||||
converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket());
|
||||
|
||||
PlaneTrackWarpImageOperation *warp_image_operation = new PlaneTrackWarpImageOperation();
|
||||
warp_image_operation->set_movie_clip(clip);
|
||||
@@ -75,7 +60,7 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter,
|
||||
converter.add_operation(set_alpha_operation);
|
||||
converter.add_link(warp_image_operation->get_output_socket(),
|
||||
set_alpha_operation->get_input_socket(0));
|
||||
converter.add_link(smaa_neighborhood->get_output_socket(),
|
||||
converter.add_link(smaa_operation->get_output_socket(),
|
||||
set_alpha_operation->get_input_socket(1));
|
||||
converter.map_output_socket(this->get_output_socket(0),
|
||||
set_alpha_operation->get_output_socket());
|
||||
|
||||
@@ -54,25 +54,10 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter,
|
||||
converter.map_input_socket(get_input_socket(3), maskoperation->get_input_socket(1));
|
||||
|
||||
/* Step 2 anti alias mask bit of an expensive operation, but does the trick. */
|
||||
SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
|
||||
converter.add_operation(smaa_edge_detection);
|
||||
SMAAOperation *smaa_operation = new SMAAOperation();
|
||||
converter.add_operation(smaa_operation);
|
||||
|
||||
converter.add_link(maskoperation->get_output_socket(),
|
||||
smaa_edge_detection->get_input_socket(0));
|
||||
|
||||
SMAABlendingWeightCalculationOperation *smaa_blending_weights =
|
||||
new SMAABlendingWeightCalculationOperation();
|
||||
converter.add_operation(smaa_blending_weights);
|
||||
|
||||
converter.add_link(smaa_edge_detection->get_output_socket(),
|
||||
smaa_blending_weights->get_input_socket(0));
|
||||
|
||||
SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
|
||||
converter.add_operation(smaa_neighborhood);
|
||||
|
||||
converter.add_link(maskoperation->get_output_socket(), smaa_neighborhood->get_input_socket(0));
|
||||
converter.add_link(smaa_blending_weights->get_output_socket(),
|
||||
smaa_neighborhood->get_input_socket(1));
|
||||
converter.add_link(maskoperation->get_output_socket(), smaa_operation->get_input_socket(0));
|
||||
|
||||
/* use mask to blend between the input colors. */
|
||||
ZCombineMaskOperation *zcombineoperation = this->get_bnode()->custom1 ?
|
||||
@@ -80,7 +65,7 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter,
|
||||
new ZCombineMaskOperation();
|
||||
converter.add_operation(zcombineoperation);
|
||||
|
||||
converter.add_link(smaa_neighborhood->get_output_socket(),
|
||||
converter.add_link(smaa_operation->get_output_socket(),
|
||||
zcombineoperation->get_input_socket(0));
|
||||
converter.map_input_socket(get_input_socket(0), zcombineoperation->get_input_socket(1));
|
||||
converter.map_input_socket(get_input_socket(2), zcombineoperation->get_input_socket(2));
|
||||
|
||||
@@ -1,805 +1,1514 @@
|
||||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
/* SPDX-FileCopyrightText: 2013 Jorge Jimenez <jorge@iryoku.com>
|
||||
* SPDX-FileCopyrightText: 2013 Jose I. Echevarria <joseignacioechevarria@gmail.com>
|
||||
* SPDX-FileCopyrightText: 2013 Belen Masia <bmasia@unizar.es>
|
||||
* SPDX-FileCopyrightText: 2013 Fernando Navarro <fernandn@microsoft.com>
|
||||
* SPDX-FileCopyrightText: 2013 Diego Gutierrez <diegog@unizar.es>
|
||||
* SPDX-FileCopyrightText: 2019-2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
* SPDX-License-Identifier: MIT AND GPL-2.0-or-later */
|
||||
|
||||
#include "COM_SMAAOperation.h"
|
||||
#include "BKE_node.hh"
|
||||
#include "COM_SMAAAreaTexture.h"
|
||||
#include "BLI_math_vector.h"
|
||||
#include "BLI_math_vector.hh"
|
||||
#include "BLI_smaa_textures.h"
|
||||
#include "BLI_span.hh"
|
||||
#include "BLI_task.hh"
|
||||
|
||||
#include "IMB_colormanagement.hh"
|
||||
|
||||
#include "COM_MemoryBuffer.h"
|
||||
#include "COM_SMAAOperation.h"
|
||||
|
||||
/**
|
||||
* _______ ___ ___ ___ ___
|
||||
* / || \/ | / \ / \
|
||||
* | (---- | \ / | / ^ \ / ^ \
|
||||
* \ \ | |\/| | / /_\ \ / /_\ \
|
||||
* ----) | | | | | / _____ \ / _____ \
|
||||
* |_______/ |__| |__| /__/ \__\ /__/ \__\
|
||||
*
|
||||
* E N H A N C E D
|
||||
* S U B P I X E L M O R P H O L O G I C A L A N T I A L I A S I N G
|
||||
*
|
||||
* http://www.iryoku.com/smaa/
|
||||
*
|
||||
* Hi, welcome aboard!
|
||||
*
|
||||
* Here you'll find instructions to get the shader up and running as fast as
|
||||
* possible.
|
||||
*
|
||||
* IMPORTANTE NOTICE: when updating, remember to update both this file and the
|
||||
* precomputed textures! They may change from version to version.
|
||||
*
|
||||
* The shader has three passes, chained together as follows:
|
||||
*
|
||||
* |input|------------------<2D>
|
||||
* v |
|
||||
* [ SMAA*EdgeDetection ] |
|
||||
* v |
|
||||
* |edgesTex| |
|
||||
* v |
|
||||
* [ SMAABlendingWeightCalculation ] |
|
||||
* v |
|
||||
* |blendTex| |
|
||||
* v |
|
||||
* [ SMAANeighborhoodBlending ] <------<2D>
|
||||
* v
|
||||
* |output|
|
||||
*
|
||||
* Note that each [pass] has its own vertex and pixel shader. Remember to use
|
||||
* oversized triangles instead of quads to avoid overshading along the
|
||||
* diagonal.
|
||||
*
|
||||
* You've three edge detection methods to choose from: luma, color or depth.
|
||||
* They represent different quality/performance and anti-aliasing/sharpness
|
||||
* tradeoffs, so our recommendation is for you to choose the one that best
|
||||
* suits your particular scenario:
|
||||
*
|
||||
* - Depth edge detection is usually the fastest but it may miss some edges.
|
||||
*
|
||||
* - Luma edge detection is usually more expensive than depth edge detection,
|
||||
* but catches visible edges that depth edge detection can miss.
|
||||
*
|
||||
* - Color edge detection is usually the most expensive one but catches
|
||||
* chroma-only edges.
|
||||
*
|
||||
* For quickstarters: just use luma edge detection.
|
||||
*
|
||||
* The general advice is to not rush the integration process and ensure each
|
||||
* step is done correctly (don't try to integrate SMAA T2x with predicated edge
|
||||
* detection from the start!). Ok then, let's go!
|
||||
*
|
||||
* 1. The first step is to create two RGBA temporal render targets for holding
|
||||
* |edgesTex| and |blendTex|.
|
||||
*
|
||||
* In DX10 or DX11, you can use a RG render target for the edges texture.
|
||||
* In the case of NVIDIA GPUs, using RG render targets seems to actually be
|
||||
* slower.
|
||||
*
|
||||
* On the Xbox 360, you can use the same render target for resolving both
|
||||
* |edgesTex| and |blendTex|, as they aren't needed simultaneously.
|
||||
*
|
||||
* 2. Both temporal render targets |edgesTex| and |blendTex| must be cleared
|
||||
* each frame. Do not forget to clear the alpha channel!
|
||||
*
|
||||
* 3. The next step is loading the two supporting precalculated textures,
|
||||
* 'areaTex' and 'searchTex'. You'll find them in the 'Textures' folder as
|
||||
* C++ headers, and also as regular DDS files. They'll be needed for the
|
||||
* 'SMAABlendingWeightCalculation' pass.
|
||||
*
|
||||
* If you use the C++ headers, be sure to load them in the format specified
|
||||
* inside of them.
|
||||
*
|
||||
* You can also compress 'areaTex' and 'searchTex' using BC5 and BC4
|
||||
* respectively, if you have that option in your content processor pipeline.
|
||||
* When compressing then, you get a non-perceptible quality decrease, and a
|
||||
* marginal performance increase.
|
||||
*
|
||||
* 4. All samplers must be set to linear filtering and clamp.
|
||||
*
|
||||
* After you get the technique working, remember that 64-bit inputs have
|
||||
* half-rate linear filtering on GCN.
|
||||
*
|
||||
* If SMAA is applied to 64-bit color buffers, switching to point filtering
|
||||
* when accessing them will increase the performance. Search for
|
||||
* 'SMAASamplePoint' to see which textures may benefit from point
|
||||
* filtering, and where (which is basically the color input in the edge
|
||||
* detection and resolve passes).
|
||||
*
|
||||
* 5. All texture reads and buffer writes must be non-sRGB, with the exception
|
||||
* of the input read and the output write in
|
||||
* 'SMAANeighborhoodBlending' (and only in this pass!). If sRGB reads in
|
||||
* this last pass are not possible, the technique will work anyway, but
|
||||
* will perform antialiasing in gamma space.
|
||||
*
|
||||
* IMPORTANT: for best results the input read for the color/luma edge
|
||||
* detection should *NOT* be sRGB.
|
||||
*
|
||||
* 6. Before including SMAA.h you'll have to setup the render target metrics,
|
||||
* the target and any optional configuration defines. Optionally you can
|
||||
* use a preset.
|
||||
*
|
||||
* You have the following targets available:
|
||||
* SMAA_HLSL_3
|
||||
* SMAA_HLSL_4
|
||||
* SMAA_HLSL_4_1
|
||||
* SMAA_GLSL_3 *
|
||||
* SMAA_GLSL_4 *
|
||||
*
|
||||
* * (See SMAA_INCLUDE_VS and SMAA_INCLUDE_PS below).
|
||||
*
|
||||
* And four presets:
|
||||
* SMAA_PRESET_LOW (%60 of the quality)
|
||||
* SMAA_PRESET_MEDIUM (%80 of the quality)
|
||||
* SMAA_PRESET_HIGH (%95 of the quality)
|
||||
* SMAA_PRESET_ULTRA (%99 of the quality)
|
||||
*
|
||||
* For example:
|
||||
* #define SMAA_RT_METRICS float4(1.0 / 1280.0, 1.0 / 720.0, 1280.0, 720.0)
|
||||
* #define SMAA_HLSL_4
|
||||
* #define SMAA_PRESET_HIGH
|
||||
* #include "SMAA.h"
|
||||
*
|
||||
* Note that SMAA_RT_METRICS doesn't need to be a macro, it can be a
|
||||
* uniform variable. The code is designed to minimize the impact of not
|
||||
* using a constant value, but it is still better to hardcode it.
|
||||
*
|
||||
* Depending on how you encoded 'areaTex' and 'searchTex', you may have to
|
||||
* add (and customize) the following defines before including SMAA.h:
|
||||
* #define SMAA_AREATEX_SELECT(sample) sample.rg
|
||||
* #define SMAA_SEARCHTEX_SELECT(sample) sample.r
|
||||
*
|
||||
* If your engine is already using porting macros, you can define
|
||||
* SMAA_CUSTOM_SL, and define the porting functions by yourself.
|
||||
*
|
||||
* 7. Then, you'll have to setup the passes as indicated in the scheme above.
|
||||
* You can take a look into SMAA.fx, to see how we did it for our demo.
|
||||
* Checkout the function wrappers, you may want to copy-paste them!
|
||||
*
|
||||
* 8. It's recommended to validate the produced |edgesTex| and |blendTex|.
|
||||
* You can use a screenshot from your engine to compare the |edgesTex|
|
||||
* and |blendTex| produced inside of the engine with the results obtained
|
||||
* with the reference demo.
|
||||
*
|
||||
* 9. After you get the last pass to work, it's time to optimize. You'll have
|
||||
* to initialize a stencil buffer in the first pass (discard is already in
|
||||
* the code), then mask execution by using it the second pass. The last
|
||||
* pass should be executed in all pixels.
|
||||
*
|
||||
*
|
||||
* After this point you can choose to enable predicated thresholding,
|
||||
* temporal supersampling and motion blur integration:
|
||||
*
|
||||
* a) If you want to use predicated thresholding, take a look into
|
||||
* SMAA_PREDICATION; you'll need to pass an extra texture in the edge
|
||||
* detection pass.
|
||||
*
|
||||
* b) If you want to enable temporal supersampling (SMAA T2x):
|
||||
*
|
||||
* 1. The first step is to render using subpixel jitters. I won't go into
|
||||
* detail, but it's as simple as moving each vertex position in the
|
||||
* vertex shader, you can check how we do it in our DX10 demo.
|
||||
*
|
||||
* 2. Then, you must setup the temporal resolve. You may want to take a look
|
||||
* into SMAAResolve for resolving 2x modes. After you get it working, you'll
|
||||
* probably see ghosting everywhere. But fear not, you can enable the
|
||||
* CryENGINE temporal reprojection by setting the SMAA_REPROJECTION macro.
|
||||
* Check out SMAA_DECODE_VELOCITY if your velocity buffer is encoded.
|
||||
*
|
||||
* 3. The next step is to apply SMAA to each subpixel jittered frame, just as
|
||||
* done for 1x.
|
||||
*
|
||||
* 4. At this point you should already have something usable, but for best
|
||||
* results the proper area textures must be set depending on current jitter.
|
||||
* For this, the parameter 'subsampleIndices' of
|
||||
* 'SMAABlendingWeightCalculationPS' must be set as follows, for our T2x
|
||||
* mode:
|
||||
*
|
||||
* @SUBSAMPLE_INDICES
|
||||
*
|
||||
* | S# | Camera Jitter | subsampleIndices |
|
||||
* +----+------------------+---------------------+
|
||||
* | 0 | ( 0.25, -0.25) | float4(1, 1, 1, 0) |
|
||||
* | 1 | (-0.25, 0.25) | float4(2, 2, 2, 0) |
|
||||
*
|
||||
* These jitter positions assume a bottom-to-top y axis. S# stands for the
|
||||
* sample number.
|
||||
*
|
||||
* More information about temporal supersampling here:
|
||||
* http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
|
||||
*
|
||||
* c) If you want to enable spatial multisampling (SMAA S2x):
|
||||
*
|
||||
* 1. The scene must be rendered using MSAA 2x. The MSAA 2x buffer must be
|
||||
* created with:
|
||||
* - DX10: see below (*)
|
||||
* - DX10.1: D3D10_STANDARD_MULTISAMPLE_PATTERN or
|
||||
* - DX11: D3D11_STANDARD_MULTISAMPLE_PATTERN
|
||||
*
|
||||
* This allows to ensure that the subsample order matches the table in
|
||||
* @SUBSAMPLE_INDICES.
|
||||
*
|
||||
* (*) In the case of DX10, we refer the reader to:
|
||||
* - SMAA::detectMSAAOrder and
|
||||
* - SMAA::msaaReorder
|
||||
*
|
||||
* These functions allow matching the standard multisample patterns by
|
||||
* detecting the subsample order for a specific GPU, and reordering
|
||||
* them appropriately.
|
||||
*
|
||||
* 2. A shader must be run to output each subsample into a separate buffer
|
||||
* (DX10 is required). You can use SMAASeparate for this purpose, or just do
|
||||
* it in an existing pass (for example, in the tone mapping pass, which has
|
||||
* the advantage of feeding tone mapped subsamples to SMAA, which will yield
|
||||
* better results).
|
||||
*
|
||||
* 3. The full SMAA 1x pipeline must be run for each separated buffer, storing
|
||||
* the results in the final buffer. The second run should alpha blend with
|
||||
* the existing final buffer using a blending factor of 0.5.
|
||||
* 'subsampleIndices' must be adjusted as in the SMAA T2x case (see point
|
||||
* b).
|
||||
*
|
||||
* d) If you want to enable temporal supersampling on top of SMAA S2x
|
||||
* (which actually is SMAA 4x):
|
||||
*
|
||||
* 1. SMAA 4x consists on temporally jittering SMAA S2x, so the first step is
|
||||
* to calculate SMAA S2x for current frame. In this case, 'subsampleIndices'
|
||||
* must be set as follows:
|
||||
*
|
||||
* | F# | S# | Camera Jitter | Net Jitter | subsampleIndices |
|
||||
* +----+----+--------------------+-------------------+----------------------+
|
||||
* | 0 | 0 | ( 0.125, 0.125) | ( 0.375, -0.125) | float4(5, 3, 1, 3) |
|
||||
* | 0 | 1 | ( 0.125, 0.125) | (-0.125, 0.375) | float4(4, 6, 2, 3) |
|
||||
* +----+----+--------------------+-------------------+----------------------+
|
||||
* | 1 | 2 | (-0.125, -0.125) | ( 0.125, -0.375) | float4(3, 5, 1, 4) |
|
||||
* | 1 | 3 | (-0.125, -0.125) | (-0.375, 0.125) | float4(6, 4, 2, 4) |
|
||||
*
|
||||
* These jitter positions assume a bottom-to-top y axis. F# stands for the
|
||||
* frame number. S# stands for the sample number.
|
||||
*
|
||||
* 2. After calculating SMAA S2x for current frame (with the new subsample
|
||||
* indices), previous frame must be reprojected as in SMAA T2x mode (see
|
||||
* point b).
|
||||
*
|
||||
* e) If motion blur is used, you may want to do the edge detection pass
|
||||
* together with motion blur. This has two advantages:
|
||||
*
|
||||
* 1. Pixels under heavy motion can be omitted from the edge detection process.
|
||||
* For these pixels we can just store "no edge", as motion blur will take
|
||||
* care of them.
|
||||
* 2. The center pixel tap is reused.
|
||||
*
|
||||
* Note that in this case depth testing should be used instead of stenciling,
|
||||
* as we have to write all the pixels in the motion blur pass.
|
||||
*
|
||||
* That's it!
|
||||
*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Blender's Defines */
|
||||
|
||||
#define SMAA_CUSTOM_SL
|
||||
#define SMAA_AREATEX_SELECT(sample) sample.xy()
|
||||
#define SMAA_SEARCHTEX_SELECT(sample) sample.x
|
||||
#define SMAATexture2D(tex) const MemoryBuffer *tex
|
||||
#define SMAATexturePass2D(tex) tex
|
||||
#define SMAASampleLevelZero(tex, coord) tex->texture_bilinear_extend(coord)
|
||||
#define SMAASampleLevelZeroPoint(tex, coord) tex->texture_bilinear_extend(coord)
|
||||
#define SMAASampleLevelZeroOffset(tex, coord, offset, size) \
|
||||
tex->texture_bilinear_extend(coord + float2(offset) / float2(size))
|
||||
#define SMAASample(tex, coord) tex->texture_bilinear_extend(coord)
|
||||
#define SMAASamplePoint(tex, coord) tex->texture_nearest_extend(coord)
|
||||
#define SMAASamplePointOffset(tex, coord, offset, size) \
|
||||
tex->texture_nearest_extend(coord + float2(offset) / float2(size))
|
||||
#define SMAASampleOffset(tex, coord, offset, size) \
|
||||
tex->texture_bilinear_extend(coord + float2(offset) / float2(size))
|
||||
#define SMAA_FLATTEN
|
||||
#define SMAA_BRANCH
|
||||
#define lerp(a, b, t) math::interpolate(a, b, t)
|
||||
#define saturate(a) math::clamp(a, 0.0f, 1.0f)
|
||||
#define mad(a, b, c) (a * b + c)
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* SMAA Presets */
|
||||
|
||||
/**
|
||||
* Note that if you use one of these presets, the following configuration
|
||||
* macros will be ignored if set in the "Configurable Defines" section.
|
||||
*/
|
||||
|
||||
#if defined(SMAA_PRESET_LOW)
|
||||
# define SMAA_THRESHOLD 0.15f
|
||||
# define SMAA_MAX_SEARCH_STEPS 4
|
||||
# define SMAA_DISABLE_DIAG_DETECTION
|
||||
# define SMAA_DISABLE_CORNER_DETECTION
|
||||
#elif defined(SMAA_PRESET_MEDIUM)
|
||||
# define SMAA_THRESHOLD 0.1f
|
||||
# define SMAA_MAX_SEARCH_STEPS 8
|
||||
# define SMAA_DISABLE_DIAG_DETECTION
|
||||
# define SMAA_DISABLE_CORNER_DETECTION
|
||||
#elif defined(SMAA_PRESET_HIGH)
|
||||
# define SMAA_THRESHOLD 0.1f
|
||||
# define SMAA_MAX_SEARCH_STEPS 16
|
||||
# define SMAA_MAX_SEARCH_STEPS_DIAG 8
|
||||
# define SMAA_CORNER_ROUNDING 25
|
||||
#elif defined(SMAA_PRESET_ULTRA)
|
||||
# define SMAA_THRESHOLD 0.05f
|
||||
# define SMAA_MAX_SEARCH_STEPS 32
|
||||
# define SMAA_MAX_SEARCH_STEPS_DIAG 16
|
||||
# define SMAA_CORNER_ROUNDING 25
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Configurable Defines */
|
||||
|
||||
/**
|
||||
* SMAA_THRESHOLD specifies the threshold or sensitivity to edges.
|
||||
* Lowering this value you will be able to detect more edges at the expense of
|
||||
* performance.
|
||||
*
|
||||
* Range: [0, 0.5]
|
||||
* 0.1 is a reasonable value, and allows to catch most visible edges.
|
||||
* 0.05 is a rather overkill value, that allows to catch 'em all.
|
||||
*
|
||||
* If temporal supersampling is used, 0.2 could be a reasonable value, as low
|
||||
* contrast edges are properly filtered by just 2x.
|
||||
*/
|
||||
#ifndef SMAA_THRESHOLD
|
||||
# define SMAA_THRESHOLD 0.1f
|
||||
#endif
|
||||
|
||||
/**
|
||||
* SMAA_DEPTH_THRESHOLD specifies the threshold for depth edge detection.
|
||||
*
|
||||
* Range: depends on the depth range of the scene.
|
||||
*/
|
||||
#ifndef SMAA_DEPTH_THRESHOLD
|
||||
# define SMAA_DEPTH_THRESHOLD (0.1f * SMAA_THRESHOLD)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* SMAA_MAX_SEARCH_STEPS specifies the maximum steps performed in the
|
||||
* horizontal/vertical pattern searches, at each side of the pixel.
|
||||
*
|
||||
* In number of pixels, it's actually the double. So the maximum line length
|
||||
* perfectly handled by, for example 16, is 64 (by perfectly, we meant that
|
||||
* longer lines won't look as good, but still antialiased).
|
||||
*
|
||||
* Range: [0, 112]
|
||||
*/
|
||||
#ifndef SMAA_MAX_SEARCH_STEPS
|
||||
# define SMAA_MAX_SEARCH_STEPS 16
|
||||
#endif
|
||||
|
||||
/**
|
||||
* SMAA_MAX_SEARCH_STEPS_DIAG specifies the maximum steps performed in the
|
||||
* diagonal pattern searches, at each side of the pixel. In this case we jump
|
||||
* one pixel at time, instead of two.
|
||||
*
|
||||
* Range: [0, 20]
|
||||
*
|
||||
* On high-end machines it is cheap (between a 0.8x and 0.9x slower for 16
|
||||
* steps), but it can have a significant impact on older machines.
|
||||
*
|
||||
* Define SMAA_DISABLE_DIAG_DETECTION to disable diagonal processing.
|
||||
*/
|
||||
#ifndef SMAA_MAX_SEARCH_STEPS_DIAG
|
||||
# define SMAA_MAX_SEARCH_STEPS_DIAG 8
|
||||
#endif
|
||||
|
||||
/**
|
||||
* SMAA_CORNER_ROUNDING specifies how much sharp corners will be rounded.
|
||||
*
|
||||
* Range: [0, 100]
|
||||
*
|
||||
* Define SMAA_DISABLE_CORNER_DETECTION to disable corner processing.
|
||||
*/
|
||||
#ifndef SMAA_CORNER_ROUNDING
|
||||
# define SMAA_CORNER_ROUNDING 25
|
||||
#endif
|
||||
|
||||
/**
|
||||
* If there is an neighbor edge that has SMAA_LOCAL_CONTRAST_FACTOR times
|
||||
* bigger contrast than current edge, current edge will be discarded.
|
||||
*
|
||||
* This allows to eliminate spurious crossing edges, and is based on the fact
|
||||
* that, if there is too much contrast in a direction, that will hide
|
||||
* perceptually contrast in the other neighbors.
|
||||
*/
|
||||
#ifndef SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR
|
||||
# define SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR 2.0f
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Predicated thresholding allows to better preserve texture details and to
|
||||
* improve performance, by decreasing the number of detected edges using an
|
||||
* additional buffer like the light accumulation buffer, object ids or even the
|
||||
* depth buffer (the depth buffer usage may be limited to indoor or short range
|
||||
* scenes).
|
||||
*
|
||||
* It locally decreases the luma or color threshold if an edge is found in an
|
||||
* additional buffer (so the global threshold can be higher).
|
||||
*
|
||||
* This method was developed by Playstation EDGE MLAA team, and used in
|
||||
* Killzone 3, by using the light accumulation buffer. More information here:
|
||||
* http://iryoku.com/aacourse/downloads/06-MLAA-on-PS3.pptx
|
||||
*/
|
||||
#ifndef SMAA_PREDICATION
|
||||
# define SMAA_PREDICATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Threshold to be used in the additional predication buffer.
|
||||
*
|
||||
* Range: depends on the input, so you'll have to find the magic number that
|
||||
* works for you.
|
||||
*/
|
||||
#ifndef SMAA_PREDICATION_THRESHOLD
|
||||
# define SMAA_PREDICATION_THRESHOLD 0.01f
|
||||
#endif
|
||||
|
||||
/**
|
||||
* How much to scale the global threshold used for luma or color edge
|
||||
* detection when using predication.
|
||||
*
|
||||
* Range: [1, 5]
|
||||
*/
|
||||
#ifndef SMAA_PREDICATION_SCALE
|
||||
# define SMAA_PREDICATION_SCALE 2.0f
|
||||
#endif
|
||||
|
||||
/**
|
||||
* How much to locally decrease the threshold.
|
||||
*
|
||||
* Range: [0, 1]
|
||||
*/
|
||||
#ifndef SMAA_PREDICATION_STRENGTH
|
||||
# define SMAA_PREDICATION_STRENGTH 0.4f
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Temporal reprojection allows to remove ghosting artifacts when using
|
||||
* temporal supersampling. We use the CryEngine 3 method which also introduces
|
||||
* velocity weighting. This feature is of extreme importance for totally
|
||||
* removing ghosting. More information here:
|
||||
* http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
|
||||
*
|
||||
* Note that you'll need to setup a velocity buffer for enabling reprojection.
|
||||
* For static geometry, saving the previous depth buffer is a viable
|
||||
* alternative.
|
||||
*/
|
||||
#ifndef SMAA_REPROJECTION
|
||||
# define SMAA_REPROJECTION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* SMAA_REPROJECTION_WEIGHT_SCALE controls the velocity weighting. It allows to
|
||||
* remove ghosting trails behind the moving object, which are not removed by
|
||||
* just using reprojection. Using low values will exhibit ghosting, while using
|
||||
* high values will disable temporal supersampling under motion.
|
||||
*
|
||||
* Behind the scenes, velocity weighting removes temporal supersampling when
|
||||
* the velocity of the subsamples differs (meaning they are different objects).
|
||||
*
|
||||
* Range: [0, 80]
|
||||
*/
|
||||
#ifndef SMAA_REPROJECTION_WEIGHT_SCALE
|
||||
# define SMAA_REPROJECTION_WEIGHT_SCALE 30.0f
|
||||
#endif
|
||||
|
||||
/**
|
||||
* On some compilers, discard cannot be used in vertex shaders. Thus, they need
|
||||
* to be compiled separately.
|
||||
*/
|
||||
#ifndef SMAA_INCLUDE_VS
|
||||
# define SMAA_INCLUDE_VS 1
|
||||
#endif
|
||||
#ifndef SMAA_INCLUDE_PS
|
||||
# define SMAA_INCLUDE_PS 1
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Texture Access Defines */
|
||||
|
||||
#ifndef SMAA_AREATEX_SELECT
|
||||
# if defined(SMAA_HLSL_3)
|
||||
# define SMAA_AREATEX_SELECT(sample) sample.ra
|
||||
# else
|
||||
# define SMAA_AREATEX_SELECT(sample) sample.rg
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef SMAA_SEARCHTEX_SELECT
|
||||
# define SMAA_SEARCHTEX_SELECT(sample) sample.r
|
||||
#endif
|
||||
|
||||
#ifndef SMAA_DECODE_VELOCITY
|
||||
# define SMAA_DECODE_VELOCITY(sample) sample.rg
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Non-Configurable Defines */
|
||||
|
||||
#define SMAA_AREATEX_MAX_DISTANCE 16
|
||||
#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
|
||||
#define SMAA_AREATEX_PIXEL_SIZE (1.0f / float2(160.0f, 560.0f))
|
||||
#define SMAA_AREATEX_SUBTEX_SIZE (1.0f / 7.0f)
|
||||
#define SMAA_SEARCHTEX_SIZE float2(66.0f, 33.0f)
|
||||
#define SMAA_SEARCHTEX_PACKED_SIZE float2(64.0f, 16.0f)
|
||||
#define SMAA_CORNER_ROUNDING_NORM (float(SMAA_CORNER_ROUNDING) / 100.0f)
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Porting Functions */
|
||||
|
||||
#if defined(SMAA_HLSL_3)
|
||||
# define SMAATexture2D(tex) sampler2D tex
|
||||
# define SMAATexturePass2D(tex) tex
|
||||
# define SMAASampleLevelZero(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
|
||||
# define SMAASampleLevelZeroPoint(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
|
||||
/* clang-format off */
|
||||
# define SMAASampleLevelZeroOffset(tex, coord, offset) tex2Dlod(tex, float4(coord + offset * SMAA_RT_METRICS.xy, 0.0, 0.0))
|
||||
/* clang-format on */
|
||||
# define SMAASample(tex, coord) tex2D(tex, coord)
|
||||
# define SMAASamplePoint(tex, coord) tex2D(tex, coord)
|
||||
# define SMAASampleOffset(tex, coord, offset) tex2D(tex, coord + offset * SMAA_RT_METRICS.xy)
|
||||
# define SMAA_FLATTEN [flatten]
|
||||
# define SMAA_BRANCH [branch]
|
||||
#endif
|
||||
#if defined(SMAA_HLSL_4) || defined(SMAA_HLSL_4_1)
|
||||
SamplerState LinearSampler
|
||||
{
|
||||
Filter = MIN_MAG_LINEAR_MIP_POINT;
|
||||
AddressU = Clamp;
|
||||
AddressV = Clamp;
|
||||
};
|
||||
SamplerState PointSampler
|
||||
{
|
||||
Filter = MIN_MAG_MIP_POINT;
|
||||
AddressU = Clamp;
|
||||
AddressV = Clamp;
|
||||
};
|
||||
# define SMAATexture2D(tex) Texture2D tex
|
||||
# define SMAATexturePass2D(tex) tex
|
||||
# define SMAASampleLevelZero(tex, coord) tex.SampleLevel(LinearSampler, coord, 0)
|
||||
# define SMAASampleLevelZeroPoint(tex, coord) tex.SampleLevel(PointSampler, coord, 0)
|
||||
/* clang-format off */
|
||||
# define SMAASampleLevelZeroOffset(tex, coord, offset) tex.SampleLevel(LinearSampler, coord, 0, offset)
|
||||
/* clang-format on */
|
||||
# define SMAASample(tex, coord) tex.Sample(LinearSampler, coord)
|
||||
# define SMAASamplePoint(tex, coord) tex.Sample(PointSampler, coord)
|
||||
# define SMAASampleOffset(tex, coord, offset) tex.Sample(LinearSampler, coord, offset)
|
||||
# define SMAA_FLATTEN [flatten]
|
||||
# define SMAA_BRANCH [branch]
|
||||
# define SMAATexture2DMS2(tex) Texture2DMS<float4, 2> tex
|
||||
# define SMAALoad(tex, pos, sample) tex.Load(pos, sample)
|
||||
# if defined(SMAA_HLSL_4_1)
|
||||
# define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0)
|
||||
# endif
|
||||
#endif
|
||||
#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4) || defined(GPU_METAL) || defined(GPU_VULKAN)
|
||||
# define SMAATexture2D(tex) sampler2D tex
|
||||
# define SMAATexturePass2D(tex) tex
|
||||
# define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0)
|
||||
# define SMAASampleLevelZeroPoint(tex, coord) textureLod(tex, coord, 0.0)
|
||||
# define SMAASampleLevelZeroOffset(tex, coord, offset) textureLodOffset(tex, coord, 0.0, offset)
|
||||
# define SMAASample(tex, coord) texture(tex, coord)
|
||||
# define SMAASamplePoint(tex, coord) texture(tex, coord)
|
||||
# define SMAASampleOffset(tex, coord, offset) texture(tex, coord, offset)
|
||||
# define SMAA_FLATTEN
|
||||
# define SMAA_BRANCH
|
||||
# define lerp(a, b, t) mix(a, b, t)
|
||||
# define saturate(a) clamp(a, 0.0, 1.0)
|
||||
# if defined(SMAA_GLSL_4)
|
||||
# define SMAAGather(tex, coord) textureGather(tex, coord)
|
||||
# endif
|
||||
# if defined(SMAA_GLSL_4)
|
||||
# define mad(a, b, c) fma(a, b, c)
|
||||
# elif defined(GPU_VULKAN)
|
||||
/* NOTE(Vulkan) mad macro doesn't work, define each override as work-around. */
|
||||
vec4 mad(vec4 a, vec4 b, vec4 c)
|
||||
{
|
||||
return fma(a, b, c);
|
||||
}
|
||||
vec3 mad(vec3 a, vec3 b, vec3 c)
|
||||
{
|
||||
return fma(a, b, c);
|
||||
}
|
||||
vec2 mad(vec2 a, vec2 b, vec2 c)
|
||||
{
|
||||
return fma(a, b, c);
|
||||
}
|
||||
float mad(float a, float b, float c)
|
||||
{
|
||||
return fma(a, b, c);
|
||||
}
|
||||
# else
|
||||
# define mad(a, b, c) (a * b + c)
|
||||
# endif
|
||||
/* NOTE(Metal): Types already natively declared in MSL. */
|
||||
# ifndef GPU_METAL
|
||||
# define float2 vec2
|
||||
# define float3 vec3
|
||||
# define float4 vec4
|
||||
# define int2 ivec2
|
||||
# define int3 ivec3
|
||||
# define int4 ivec4
|
||||
# define bool2 bvec2
|
||||
# define bool3 bvec3
|
||||
# define bool4 bvec4
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* clang-format off */
|
||||
#if !defined(SMAA_HLSL_3) && !defined(SMAA_HLSL_4) && !defined(SMAA_HLSL_4_1) && !defined(SMAA_GLSL_3) && !defined(SMAA_GLSL_4) && !defined(SMAA_CUSTOM_SL)
|
||||
# error you must define the shading language: SMAA_HLSL_*, SMAA_GLSL_* or SMAA_CUSTOM_SL
|
||||
#endif
|
||||
/* clang-format on */
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
/*
|
||||
* An implementation of Enhanced Sub-pixel Morphological Anti-aliasing (SMAA)
|
||||
*
|
||||
* The algorithm was proposed by:
|
||||
* Jorge Jimenez, Jose I. Echevarria, Tiago Sousa, Diego Gutierrez
|
||||
*
|
||||
* http://www.iryoku.com/smaa/
|
||||
*
|
||||
* This file is based on SMAA-CPP:
|
||||
*
|
||||
* https://github.com/i_ri-E/smaa-cpp
|
||||
*
|
||||
* Currently only SMAA 1x mode is provided, so the operation will be done
|
||||
* with no spatial multi-sampling nor temporal super-sampling.
|
||||
*
|
||||
* NOTE: This program assumes the screen coordinates are DirectX style, so
|
||||
* the vertical direction is upside-down. "top" and "bottom" actually mean
|
||||
* bottom and top, respectively.
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Misc functions */
|
||||
|
||||
/**
|
||||
* Conditional move:
|
||||
*/
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Non-Configurable Defines */
|
||||
|
||||
#define SMAA_AREATEX_SIZE 80
|
||||
#define SMAA_AREATEX_MAX_DISTANCE 20
|
||||
#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
|
||||
#define SMAA_MAX_SEARCH_STEPS 362 /* 362 - 1 = 19^2 */
|
||||
#define SMAA_MAX_SEARCH_STEPS_DIAG 19
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Internal Functions to Sample Pixel Color from Image */
|
||||
|
||||
static inline void sample(MemoryBuffer *reader, int x, int y, float color[4])
|
||||
static void SMAAMovc(float2 cond, float2 &variable, float2 value)
|
||||
{
|
||||
reader->read_elem_checked(x, y, color);
|
||||
/* Use select function (select(genType A, genType B, genBType cond)). */
|
||||
variable = math::interpolate(variable, value, cond);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void sample_bilinear_vertical(T *reader, int x, int y, float yoffset, float color[4])
|
||||
static void SMAAMovc(float4 cond, float4 &variable, float4 value)
|
||||
{
|
||||
float iy = floorf(yoffset);
|
||||
float fy = yoffset - iy;
|
||||
y += int(iy);
|
||||
|
||||
float color00[4], color01[4];
|
||||
|
||||
sample(reader, x + 0, y + 0, color00);
|
||||
sample(reader, x + 0, y + 1, color01);
|
||||
|
||||
color[0] = interpf(color01[0], color00[0], fy);
|
||||
color[1] = interpf(color01[1], color00[1], fy);
|
||||
color[2] = interpf(color01[2], color00[2], fy);
|
||||
color[3] = interpf(color01[3], color00[3], fy);
|
||||
/* Use select function (select(genType A, genType B, genBType cond)). */
|
||||
variable = math::interpolate(variable, value, cond);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void sample_bilinear_horizontal(T *reader, int x, int y, float xoffset, float color[4])
|
||||
#if SMAA_INCLUDE_VS
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Vertex Shaders */
|
||||
|
||||
/**
|
||||
* Edge Detection Vertex Shader
|
||||
*/
|
||||
static void SMAAEdgeDetectionVS(float2 texcoord, int2 size, float4 offset[3])
|
||||
{
|
||||
float ix = floorf(xoffset);
|
||||
float fx = xoffset - ix;
|
||||
x += int(ix);
|
||||
|
||||
float color00[4], color10[4];
|
||||
|
||||
sample(reader, x + 0, y + 0, color00);
|
||||
sample(reader, x + 1, y + 0, color10);
|
||||
|
||||
color[0] = interpf(color10[0], color00[0], fx);
|
||||
color[1] = interpf(color10[1], color00[1], fx);
|
||||
color[2] = interpf(color10[2], color00[2], fx);
|
||||
color[3] = interpf(color10[3], color00[3], fx);
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Internal Functions to Sample Blending Weights from AreaTex */
|
||||
|
||||
static inline const float *areatex_sample_internal(const float *areatex, int x, int y)
|
||||
{
|
||||
return &areatex[(std::clamp(x, 0, SMAA_AREATEX_SIZE - 1) +
|
||||
std::clamp(y, 0, SMAA_AREATEX_SIZE - 1) * SMAA_AREATEX_SIZE) *
|
||||
2];
|
||||
offset[0] = float4(texcoord.xy(), texcoord.xy()) +
|
||||
float4(-1.0f, 0.0f, 0.0f, -1.0f) / float4(size, size);
|
||||
offset[1] = float4(texcoord.xy(), texcoord.xy()) +
|
||||
float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size);
|
||||
offset[2] = float4(texcoord.xy(), texcoord.xy()) +
|
||||
float4(-2.0f, 0.0f, 0.0f, -2.0f) / float4(size, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* We have the distance and both crossing edges. So, what are the areas
|
||||
* at each side of current edge?
|
||||
* Blend Weight Calculation Vertex Shader
|
||||
*/
|
||||
static void area(int d1, int d2, int e1, int e2, float weights[2])
|
||||
static void SMAABlendingWeightCalculationVS(float2 texcoord,
|
||||
int2 size,
|
||||
float2 &pixcoord,
|
||||
float4 offset[3])
|
||||
{
|
||||
/* The areas texture is compressed quadratically: */
|
||||
float x = float(SMAA_AREATEX_MAX_DISTANCE * e1) + sqrtf(float(d1));
|
||||
float y = float(SMAA_AREATEX_MAX_DISTANCE * e2) + sqrtf(float(d2));
|
||||
pixcoord = texcoord * float2(size);
|
||||
|
||||
float ix = floorf(x), iy = floorf(y);
|
||||
float fx = x - ix, fy = y - iy;
|
||||
int X = int(ix), Y = int(iy);
|
||||
// We will use these offsets for the searches later on (see @PSEUDO_GATHER4):
|
||||
offset[0] = float4(texcoord.xy(), texcoord.xy()) +
|
||||
float4(-0.25f, -0.125f, 1.25f, -0.125f) / float4(size, size);
|
||||
offset[1] = float4(texcoord.xy(), texcoord.xy()) +
|
||||
float4(-0.125f, -0.25f, -0.125f, 1.25f) / float4(size, size);
|
||||
|
||||
const float *weights00 = areatex_sample_internal(areatex, X + 0, Y + 0);
|
||||
const float *weights10 = areatex_sample_internal(areatex, X + 1, Y + 0);
|
||||
const float *weights01 = areatex_sample_internal(areatex, X + 0, Y + 1);
|
||||
const float *weights11 = areatex_sample_internal(areatex, X + 1, Y + 1);
|
||||
|
||||
weights[0] = interpf(
|
||||
interpf(weights11[0], weights01[0], fx), interpf(weights10[0], weights00[0], fx), fy);
|
||||
weights[1] = interpf(
|
||||
interpf(weights11[1], weights01[1], fx), interpf(weights10[1], weights00[1], fx), fy);
|
||||
// And these for the searches, they indicate the ends of the loops:
|
||||
offset[2] = float4(offset[0].x, offset[0].z, offset[1].y, offset[1].w) +
|
||||
(float4(-2.0f, 2.0f, -2.0f, 2.0f) * float(SMAA_MAX_SEARCH_STEPS)) /
|
||||
float4(float2(size.x), float2(size.y));
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to area(), this calculates the area corresponding to a certain
|
||||
* Neighborhood Blending Vertex Shader
|
||||
*/
|
||||
static void SMAANeighborhoodBlendingVS(float2 texcoord, int2 size, float4 &offset)
|
||||
{
|
||||
offset = float4(texcoord, texcoord) + float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size);
|
||||
}
|
||||
#endif // SMAA_INCLUDE_VS
|
||||
|
||||
/**
|
||||
* Luma Edge Detection
|
||||
*
|
||||
* IMPORTANT NOTICE: luma edge detection requires gamma-corrected colors, and
|
||||
* thus 'colorTex' should be a non-sRGB texture.
|
||||
*/
|
||||
static float2 SMAALumaEdgeDetectionPS(float2 texcoord,
|
||||
float4 offset[3],
|
||||
SMAATexture2D(colorTex),
|
||||
#if SMAA_PREDICATION
|
||||
SMAATexture2D(predicationTex),
|
||||
#endif
|
||||
float edge_threshold,
|
||||
float3 luminance_coefficients,
|
||||
float local_contrast_adaptation_factor)
|
||||
{
|
||||
#if SMAA_PREDICATION
|
||||
float2 threshold = SMAACalculatePredicatedThreshold(
|
||||
texcoord, offset, SMAATexturePass2D(predicationTex));
|
||||
#else
|
||||
// Calculate the threshold:
|
||||
float2 threshold = float2(edge_threshold, edge_threshold);
|
||||
#endif
|
||||
|
||||
// Calculate lumas:
|
||||
// float4 weights = float4(0.2126, 0.7152, 0.0722, 0.0);
|
||||
float4 weights = float4(luminance_coefficients, 0.0f);
|
||||
float L = math::dot(SMAASamplePoint(colorTex, texcoord), weights);
|
||||
|
||||
float Lleft = math::dot(SMAASamplePoint(colorTex, offset[0].xy()), weights);
|
||||
float Ltop = math::dot(SMAASamplePoint(colorTex, offset[0].zw()), weights);
|
||||
|
||||
// We do the usual threshold:
|
||||
float4 delta;
|
||||
float2 delta_left_top = math::abs(L - float2(Lleft, Ltop));
|
||||
delta.x = delta_left_top.x;
|
||||
delta.y = delta_left_top.y;
|
||||
float2 edges = math::step(threshold, delta.xy());
|
||||
|
||||
// Then return early if there is no edge:
|
||||
if (math::dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
|
||||
return float2(0.0f);
|
||||
}
|
||||
|
||||
// Calculate right and bottom deltas:
|
||||
float Lright = math::dot(SMAASamplePoint(colorTex, offset[1].xy()), weights);
|
||||
float Lbottom = math::dot(SMAASamplePoint(colorTex, offset[1].zw()), weights);
|
||||
float2 delta_right_bottom = math::abs(L - float2(Lright, Lbottom));
|
||||
delta.z = delta_right_bottom.x;
|
||||
delta.w = delta_right_bottom.y;
|
||||
|
||||
// Calculate the maximum delta in the direct neighborhood:
|
||||
float2 maxDelta = math::max(delta.xy(), delta.zw());
|
||||
|
||||
// Calculate left-left and top-top deltas:
|
||||
float Lleftleft = math::dot(SMAASamplePoint(colorTex, offset[2].xy()), weights);
|
||||
float Ltoptop = math::dot(SMAASamplePoint(colorTex, offset[2].zw()), weights);
|
||||
float2 delta_left_left_top_top = math::abs(float2(Lleft, Ltop) - float2(Lleftleft, Ltoptop));
|
||||
delta.z = delta_left_left_top_top.x;
|
||||
delta.w = delta_left_left_top_top.y;
|
||||
|
||||
// Calculate the final maximum delta:
|
||||
maxDelta = math::max(maxDelta.xy(), delta.zw());
|
||||
float finalDelta = math::max(maxDelta.x, maxDelta.y);
|
||||
|
||||
// Local contrast adaptation:
|
||||
edges *= math::step(finalDelta, local_contrast_adaptation_factor * delta.xy());
|
||||
|
||||
return edges;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Diagonal Search Functions */
|
||||
|
||||
#if !defined(SMAA_DISABLE_DIAG_DETECTION)
|
||||
|
||||
/**
|
||||
* Allows to decode two binary values from a bilinear-filtered access.
|
||||
*/
|
||||
static float2 SMAADecodeDiagBilinearAccess(float2 e)
|
||||
{
|
||||
// Bilinear access for fetching 'e' have a 0.25 offset, and we are
|
||||
// interested in the R and G edges:
|
||||
//
|
||||
// +---G---+-------+
|
||||
// | x o R x |
|
||||
// +-------+-------+
|
||||
//
|
||||
// Then, if one of these edge is enabled:
|
||||
// Red: (0.75 * X + 0.25 * 1) => 0.25 or 1.0
|
||||
// Green: (0.75 * 1 + 0.25 * X) => 0.75 or 1.0
|
||||
//
|
||||
// This function will unpack the values (mad + mul + round):
|
||||
// wolframalpha.com: round(x * abs(5 * x - 5 * 0.75)) plot 0 to 1
|
||||
e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f);
|
||||
return math::round(e);
|
||||
}
|
||||
|
||||
static float4 SMAADecodeDiagBilinearAccess(float4 e)
|
||||
{
|
||||
e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f);
|
||||
e.z = e.z * math::abs(5.0f * e.z - 5.0f * 0.75f);
|
||||
return math::round(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* These functions allows to perform diagonal pattern searches.
|
||||
*/
|
||||
static float2 SMAASearchDiag1(
|
||||
SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e)
|
||||
{
|
||||
float4 coord = float4(texcoord, -1.0f, 1.0f);
|
||||
float3 t = float3(1.0f / float2(size), 1.0f);
|
||||
while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) {
|
||||
float3 increment = mad(t, float3(dir, 1.0f), coord.xyz());
|
||||
coord.x = increment.x;
|
||||
coord.y = increment.y;
|
||||
coord.z = increment.z;
|
||||
e = SMAASamplePoint(edgesTex, coord.xy()).xy();
|
||||
coord.w = math::dot(e, float2(0.5f, 0.5f));
|
||||
}
|
||||
return coord.zw();
|
||||
}
|
||||
|
||||
static float2 SMAASearchDiag2(
|
||||
SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e)
|
||||
{
|
||||
float4 coord = float4(texcoord, -1.0f, 1.0f);
|
||||
coord.x += 0.25f / size.x; // See @SearchDiag2Optimization
|
||||
float3 t = float3(1.0f / float2(size), 1.0f);
|
||||
while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) {
|
||||
float3 increment = mad(t, float3(dir, 1.0f), coord.xyz());
|
||||
coord.x = increment.x;
|
||||
coord.y = increment.y;
|
||||
coord.z = increment.z;
|
||||
|
||||
// @SearchDiag2Optimization
|
||||
// Fetch both edges at once using bilinear filtering:
|
||||
e = SMAASampleLevelZero(edgesTex, coord.xy()).xy();
|
||||
e = SMAADecodeDiagBilinearAccess(e);
|
||||
|
||||
// Non-optimized version:
|
||||
// e.g = SMAASampleLevelZero(edgesTex, coord.xy).g;
|
||||
// e.r = SMAASampleLevelZeroOffset(edgesTex, coord.xy, int2(1, 0), size).r;
|
||||
|
||||
coord.w = math::dot(e, float2(0.5f, 0.5f));
|
||||
}
|
||||
return coord.zw();
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to SMAAArea, this calculates the area corresponding to a certain
|
||||
* diagonal distance and crossing edges 'e'.
|
||||
*/
|
||||
static void area_diag(int d1, int d2, int e1, int e2, float weights[2])
|
||||
static float2 SMAAAreaDiag(SMAATexture2D(areaTex), float2 dist, float2 e, float offset)
|
||||
{
|
||||
int x = SMAA_AREATEX_MAX_DISTANCE_DIAG * e1 + d1;
|
||||
int y = SMAA_AREATEX_MAX_DISTANCE_DIAG * e2 + d2;
|
||||
float2 texcoord = mad(
|
||||
float2(SMAA_AREATEX_MAX_DISTANCE_DIAG, SMAA_AREATEX_MAX_DISTANCE_DIAG), e, dist);
|
||||
|
||||
const float *w = areatex_sample_internal(areatex_diag, x, y);
|
||||
copy_v2_v2(weights, w);
|
||||
// We do a scale and bias for mapping to texel space:
|
||||
texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE);
|
||||
|
||||
// Diagonal areas are on the second half of the texture:
|
||||
texcoord.x += 0.5f;
|
||||
|
||||
// Move to proper place, according to the subpixel offset:
|
||||
texcoord.y += SMAA_AREATEX_SUBTEX_SIZE * offset;
|
||||
|
||||
// Do it!
|
||||
return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord));
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Edge Detection (First Pass) */
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
|
||||
SMAAEdgeDetectionOperation::SMAAEdgeDetectionOperation()
|
||||
/**
|
||||
* This searches for diagonal patterns and returns the corresponding weights.
|
||||
*/
|
||||
static float2 SMAACalculateDiagWeights(SMAATexture2D(edgesTex),
|
||||
SMAATexture2D(areaTex),
|
||||
float2 texcoord,
|
||||
float2 e,
|
||||
float4 subsampleIndices,
|
||||
int2 size)
|
||||
{
|
||||
this->add_input_socket(DataType::Color); /* image */
|
||||
this->add_input_socket(DataType::Value); /* Depth, material ID, etc. TODO: currently unused. */
|
||||
this->add_output_socket(DataType::Color);
|
||||
flags_.can_be_constant = true;
|
||||
this->set_threshold(CMP_DEFAULT_SMAA_THRESHOLD);
|
||||
this->set_local_contrast_adaptation_factor(CMP_DEFAULT_SMAA_CONTRAST_LIMIT);
|
||||
float2 weights = float2(0.0f, 0.0f);
|
||||
|
||||
// Search for the line ends:
|
||||
float4 d;
|
||||
float2 end;
|
||||
if (e.x > 0.0f) {
|
||||
float2 negative_diagonal = SMAASearchDiag1(
|
||||
SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, 1.0f), size, end);
|
||||
d.x = negative_diagonal.x;
|
||||
d.z = negative_diagonal.y;
|
||||
d.x += float(end.y > 0.9f);
|
||||
}
|
||||
else {
|
||||
d.x = 0.0f;
|
||||
d.z = 0.0f;
|
||||
}
|
||||
float2 positive_diagonal = SMAASearchDiag1(
|
||||
SMAATexturePass2D(edgesTex), texcoord, float2(1.0, -1.0), size, end);
|
||||
d.y = positive_diagonal.x;
|
||||
d.w = positive_diagonal.y;
|
||||
|
||||
SMAA_BRANCH
|
||||
if (d.x + d.y > 2.0f) { // d.x + d.y + 1 > 3
|
||||
// Fetch the crossing edges:
|
||||
float4 coords = float4(texcoord, texcoord) +
|
||||
float4(-d.x + 0.25f, d.x, d.y, -d.y - 0.25f) / float4(size, size);
|
||||
float4 c;
|
||||
float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).xy();
|
||||
float2 right_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy();
|
||||
c.x = left_edge.x;
|
||||
c.y = left_edge.y;
|
||||
c.z = right_edge.x;
|
||||
c.w = right_edge.y;
|
||||
float4 decoded_access = SMAADecodeDiagBilinearAccess(c);
|
||||
c.y = decoded_access.x;
|
||||
c.x = decoded_access.y;
|
||||
c.w = decoded_access.z;
|
||||
c.z = decoded_access.w;
|
||||
|
||||
// Non-optimized version:
|
||||
// float4 coords = mad(float4(-d.x, d.x, d.y, -d.y), SMAA_RT_METRICS.xyxy, texcoord.xyxy);
|
||||
// float4 c;
|
||||
// c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2(-1, 0), size).g;
|
||||
// c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2( 0, 0), size).r;
|
||||
// c.z = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1, 0), size).g;
|
||||
// c.w = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1, -1), size).r;
|
||||
|
||||
// Merge crossing edges at each side into a single value:
|
||||
float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w));
|
||||
|
||||
// Remove the crossing edge if we didn't found the end of the line:
|
||||
SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f));
|
||||
|
||||
// Fetch the areas for this line:
|
||||
weights += SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.z);
|
||||
}
|
||||
|
||||
// Search for the line ends:
|
||||
float2 negative_diagonal = SMAASearchDiag2(
|
||||
SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, -1.0f), size, end);
|
||||
d.x = negative_diagonal.x;
|
||||
d.z = negative_diagonal.y;
|
||||
if (SMAASamplePointOffset(edgesTex, texcoord, int2(1, 0), size).x > 0.0f) {
|
||||
float2 positive_diagonal = SMAASearchDiag2(
|
||||
SMAATexturePass2D(edgesTex), texcoord, float2(1.0f, 1.0f), size, end);
|
||||
d.y = positive_diagonal.x;
|
||||
d.w = positive_diagonal.y;
|
||||
d.y += float(end.y > 0.9f);
|
||||
}
|
||||
else {
|
||||
d.y = 0.0f;
|
||||
d.w = 0.0f;
|
||||
}
|
||||
|
||||
SMAA_BRANCH
|
||||
if (d.x + d.y > 2.0f) { // d.x + d.y + 1 > 3
|
||||
// Fetch the crossing edges:
|
||||
float4 coords = float4(texcoord, texcoord) + float4(-d.x, -d.x, d.y, d.y) / float4(size, size);
|
||||
float4 c;
|
||||
c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).y;
|
||||
c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(0, -1), size).x;
|
||||
float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy();
|
||||
c.z = left_edge.y;
|
||||
c.w = left_edge.x;
|
||||
float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w));
|
||||
|
||||
// Remove the crossing edge if we didn't found the end of the line:
|
||||
SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f));
|
||||
|
||||
// Fetch the areas for this line:
|
||||
float2 area = SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.w).xy();
|
||||
weights.x += area.y;
|
||||
weights.y += area.x;
|
||||
}
|
||||
|
||||
return weights;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Horizontal/Vertical Search Functions */
|
||||
|
||||
/**
|
||||
* This allows to determine how much length should we add in the last step
|
||||
* of the searches. It takes the bilinearly interpolated edge (see
|
||||
* @PSEUDO_GATHER4), and adds 0, 1 or 2, depending on which edges and
|
||||
* crossing edges are active.
|
||||
*/
|
||||
static float SMAASearchLength(SMAATexture2D(searchTex), float2 e, float offset)
|
||||
{
|
||||
// The texture is flipped vertically, with left and right cases taking half
|
||||
// of the space horizontally:
|
||||
float2 scale = SMAA_SEARCHTEX_SIZE * float2(0.5f, -1.0f);
|
||||
float2 bias = SMAA_SEARCHTEX_SIZE * float2(offset, 1.0f);
|
||||
|
||||
// Scale and bias to access texel centers:
|
||||
scale += float2(-1.0f, 1.0f);
|
||||
bias += float2(0.5f, -0.5f);
|
||||
|
||||
// Convert from pixel coordinates to texcoords:
|
||||
// (We use SMAA_SEARCHTEX_PACKED_SIZE because the texture is cropped)
|
||||
scale *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE;
|
||||
bias *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE;
|
||||
|
||||
// Lookup the search texture:
|
||||
return SMAA_SEARCHTEX_SELECT(SMAASampleLevelZero(searchTex, mad(scale, e, bias)));
|
||||
}
|
||||
|
||||
void SMAAEdgeDetectionOperation::set_threshold(float threshold)
|
||||
/**
|
||||
* Horizontal/vertical search functions for the 2nd pass.
|
||||
*/
|
||||
static float SMAASearchXLeft(
|
||||
SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
|
||||
{
|
||||
/* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 0.5 */
|
||||
threshold_ = scalenorm(0, 0.5, threshold);
|
||||
/**
|
||||
* @PSEUDO_GATHER4
|
||||
* This texcoord has been offset by (-0.25, -0.125) in the vertex shader to
|
||||
* sample between edge, thus fetching four edges in a row.
|
||||
* Sampling with different offsets in each direction allows to disambiguate
|
||||
* which edges are active from the four fetched ones.
|
||||
*/
|
||||
float2 e = float2(0.0f, 1.0f);
|
||||
while (texcoord.x > end && e.y > 0.8281f && // Is there some edge not activated?
|
||||
e.x == 0.0f) // Or is there a crossing edge that breaks the line?
|
||||
{
|
||||
e = SMAASampleLevelZero(edgesTex, texcoord).xy();
|
||||
texcoord = texcoord - float2(2.0f, 0.0f) / float2(size);
|
||||
}
|
||||
|
||||
float offset = mad(
|
||||
-(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0f), 3.25f);
|
||||
return texcoord.x + offset / size.x;
|
||||
|
||||
// Non-optimized version:
|
||||
// We correct the previous (-0.25, -0.125) offset we applied:
|
||||
// texcoord.x += 0.25 * SMAA_RT_METRICS.x;
|
||||
|
||||
// The searches are bias by 1, so adjust the coords accordingly:
|
||||
// texcoord.x += SMAA_RT_METRICS.x;
|
||||
|
||||
// Disambiguate the length added by the last step:
|
||||
// texcoord.x += 2.0 * SMAA_RT_METRICS.x; // Undo last step
|
||||
// texcoord.x -= SMAA_RT_METRICS.x * (255.0 / 127.0) *
|
||||
// SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0); return mad(SMAA_RT_METRICS.x, offset,
|
||||
// texcoord.x);
|
||||
}
|
||||
|
||||
void SMAAEdgeDetectionOperation::set_local_contrast_adaptation_factor(float factor)
|
||||
static float SMAASearchXRight(
|
||||
SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
|
||||
{
|
||||
/* UI values are between 0 and 1 for simplicity but algorithm expects values between 1 and 10 */
|
||||
contrast_limit_ = scalenorm(1, 10, factor);
|
||||
float2 e = float2(0.0f, 1.0f);
|
||||
while (texcoord.x < end && e.y > 0.8281f && // Is there some edge not activated?
|
||||
e.x == 0.0f) // Or is there a crossing edge that breaks the line?
|
||||
{
|
||||
e = SMAASampleLevelZero(edgesTex, texcoord).xy();
|
||||
texcoord = texcoord + float2(2.0f, 0.0f) / float2(size);
|
||||
}
|
||||
float offset = mad(
|
||||
-(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.5f), 3.25f);
|
||||
return texcoord.x - offset / size.x;
|
||||
}
|
||||
|
||||
void SMAAEdgeDetectionOperation::get_area_of_interest(const int /*input_idx*/,
|
||||
const rcti &output_area,
|
||||
rcti &r_input_area)
|
||||
static float SMAASearchYUp(
|
||||
SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
|
||||
{
|
||||
r_input_area.xmax = output_area.xmax + 1;
|
||||
r_input_area.xmin = output_area.xmin - 2;
|
||||
r_input_area.ymax = output_area.ymax + 1;
|
||||
r_input_area.ymin = output_area.ymin - 2;
|
||||
float2 e = float2(1.0f, 0.0f);
|
||||
while (texcoord.y > end && e.x > 0.8281f && // Is there some edge not activated?
|
||||
e.y == 0.0f) // Or is there a crossing edge that breaks the line?
|
||||
{
|
||||
e = SMAASampleLevelZero(edgesTex, texcoord).xy();
|
||||
texcoord = texcoord - float2(0.0f, 2.0f) / float2(size);
|
||||
}
|
||||
float2 flipped_edge = float2(e.y, e.x);
|
||||
float offset = mad(-(255.0f / 127.0f),
|
||||
SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.0f),
|
||||
3.25f);
|
||||
return texcoord.y + offset / size.y;
|
||||
}
|
||||
|
||||
void SMAAEdgeDetectionOperation::update_memory_buffer_partial(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs)
|
||||
static float SMAASearchYDown(
|
||||
SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
|
||||
{
|
||||
const MemoryBuffer *image = inputs[0];
|
||||
for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) {
|
||||
float color[4];
|
||||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
float2 e = float2(1.0f, 0.0f);
|
||||
while (texcoord.y < end && e.x > 0.8281f && // Is there some edge not activated?
|
||||
e.y == 0.0f) // Or is there a crossing edge that breaks the line?
|
||||
{
|
||||
e = SMAASampleLevelZero(edgesTex, texcoord).xy();
|
||||
texcoord = texcoord + float2(0.0f, 2.0f) / float2(size);
|
||||
}
|
||||
float2 flipped_edge = float2(e.y, e.x);
|
||||
float offset = mad(-(255.0f / 127.0f),
|
||||
SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.5f),
|
||||
3.25f);
|
||||
return texcoord.y - offset / size.y;
|
||||
}
|
||||
|
||||
/* Calculate luma deltas: */
|
||||
image->read_elem_checked(x, y, color);
|
||||
const float L = IMB_colormanagement_get_luminance(color);
|
||||
image->read_elem_checked(x - 1, y, color);
|
||||
const float Lleft = IMB_colormanagement_get_luminance(color);
|
||||
image->read_elem_checked(x, y - 1, color);
|
||||
const float Ltop = IMB_colormanagement_get_luminance(color);
|
||||
const float Dleft = fabsf(L - Lleft);
|
||||
const float Dtop = fabsf(L - Ltop);
|
||||
/**
|
||||
* Ok, we have the distance and both crossing edges. So, what are the areas
|
||||
* at each side of current edge?
|
||||
*/
|
||||
static float2 SMAAArea(SMAATexture2D(areaTex), float2 dist, float e1, float e2, float offset)
|
||||
{
|
||||
// Rounding prevents precision errors of bilinear filtering:
|
||||
float2 texcoord = mad(float2(SMAA_AREATEX_MAX_DISTANCE, SMAA_AREATEX_MAX_DISTANCE),
|
||||
math::round(4.0f * float2(e1, e2)),
|
||||
dist);
|
||||
|
||||
/* We do the usual threshold: */
|
||||
it.out[0] = (x > 0 && Dleft >= threshold_) ? 1.0f : 0.0f;
|
||||
it.out[1] = (y > 0 && Dtop >= threshold_) ? 1.0f : 0.0f;
|
||||
it.out[2] = 0.0f;
|
||||
it.out[3] = 1.0f;
|
||||
// We do a scale and bias for mapping to texel space:
|
||||
texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE);
|
||||
|
||||
/* Then discard if there is no edge: */
|
||||
if (is_zero_v2(it.out)) {
|
||||
continue;
|
||||
// Move to proper place, according to the subpixel offset:
|
||||
texcoord.y = mad(SMAA_AREATEX_SUBTEX_SIZE, offset, texcoord.y);
|
||||
|
||||
// Do it!
|
||||
return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord));
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Corner Detection Functions */
|
||||
|
||||
static void SMAADetectHorizontalCornerPattern(SMAATexture2D(edgesTex),
|
||||
float2 &weights,
|
||||
float4 texcoord,
|
||||
float2 d,
|
||||
int2 size,
|
||||
int corner_rounding)
|
||||
{
|
||||
#if !defined(SMAA_DISABLE_CORNER_DETECTION)
|
||||
float2 leftRight = math::step(d, float2(d.y, d.x));
|
||||
float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight;
|
||||
|
||||
rounding /= leftRight.x + leftRight.y; // Reduce blending for pixels in the center of a line.
|
||||
|
||||
float2 factor = float2(1.0f, 1.0f);
|
||||
factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, 1), size).x;
|
||||
factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).x;
|
||||
factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, -2), size).x;
|
||||
factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, -2), size).x;
|
||||
|
||||
weights *= saturate(factor);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void SMAADetectVerticalCornerPattern(SMAATexture2D(edgesTex),
|
||||
float2 &weights,
|
||||
float4 texcoord,
|
||||
float2 d,
|
||||
int2 size,
|
||||
int corner_rounding)
|
||||
{
|
||||
#if !defined(SMAA_DISABLE_CORNER_DETECTION)
|
||||
float2 leftRight = math::step(d, float2(d.y, d.x));
|
||||
float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight;
|
||||
|
||||
rounding /= leftRight.x + leftRight.y;
|
||||
|
||||
float2 factor = float2(1.0f, 1.0f);
|
||||
factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(1, 0), size).y;
|
||||
factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).y;
|
||||
factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(-2, 0), size).y;
|
||||
factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(-2, 1), size).y;
|
||||
|
||||
weights *= saturate(factor);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Blending Weight Calculation Pixel Shader (Second Pass) */
|
||||
|
||||
static float4 SMAABlendingWeightCalculationPS(float2 texcoord,
|
||||
float2 pixcoord,
|
||||
float4 offset[3],
|
||||
MemoryBuffer *edgesTex,
|
||||
MemoryBuffer *areaTex,
|
||||
MemoryBuffer *searchTex,
|
||||
float4 subsampleIndices,
|
||||
int2 size,
|
||||
int corner_rounding)
|
||||
{ // Just pass zero for SMAA 1x, see @SUBSAMPLE_INDICES.
|
||||
float4 weights = float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
|
||||
float2 e = SMAASamplePoint(edgesTex, texcoord).xy();
|
||||
|
||||
SMAA_BRANCH
|
||||
if (e.y > 0.0f) { // Edge at north
|
||||
#if !defined(SMAA_DISABLE_DIAG_DETECTION)
|
||||
// Diagonals have both north and west edges, so searching for them in
|
||||
// one of the boundaries is enough.
|
||||
float2 diagonal_weights = SMAACalculateDiagWeights(SMAATexturePass2D(edgesTex),
|
||||
SMAATexturePass2D(areaTex),
|
||||
texcoord,
|
||||
e,
|
||||
subsampleIndices,
|
||||
size);
|
||||
|
||||
weights.x = diagonal_weights.x;
|
||||
weights.y = diagonal_weights.y;
|
||||
|
||||
// We give priority to diagonals, so if we find a diagonal we skip
|
||||
// horizontal/vertical processing.
|
||||
SMAA_BRANCH
|
||||
if (weights.x == -weights.y) { // weights.x + weights.y == 0.0
|
||||
#endif
|
||||
|
||||
float2 d;
|
||||
|
||||
// Find the distance to the left:
|
||||
float3 coords;
|
||||
coords.x = SMAASearchXLeft(SMAATexturePass2D(edgesTex),
|
||||
SMAATexturePass2D(searchTex),
|
||||
offset[0].xy(),
|
||||
offset[2].x,
|
||||
size);
|
||||
coords.y =
|
||||
offset[1].y; // offset[1].y = texcoord.y - 0.25 * SMAA_RT_METRICS.y (@CROSSING_OFFSET)
|
||||
d.x = coords.x;
|
||||
|
||||
// Now fetch the left crossing edges, two at a time using bilinear
|
||||
// filtering. Sampling at -0.25 (see @CROSSING_OFFSET) enables to
|
||||
// discern what value each edge has:
|
||||
float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).x;
|
||||
|
||||
// Find the distance to the right:
|
||||
coords.z = SMAASearchXRight(SMAATexturePass2D(edgesTex),
|
||||
SMAATexturePass2D(searchTex),
|
||||
offset[0].zw(),
|
||||
offset[2].y,
|
||||
size);
|
||||
d.y = coords.z;
|
||||
|
||||
// We want the distances to be in pixel units (doing this here allows
|
||||
// better interleaving of arithmetic and memory accesses):
|
||||
d = math::abs(math::round(mad(float2(size.x), d, -float2(pixcoord.x))));
|
||||
|
||||
// SMAAArea below needs a sqrt, as the areas texture is compressed
|
||||
// quadratically:
|
||||
float2 sqrt_d = math::sqrt(d);
|
||||
|
||||
// Fetch the right crossing edges:
|
||||
float e2 =
|
||||
SMAASampleLevelZeroOffset(edgesTex, float2(coords.z, coords.y), int2(1, 0), size).x;
|
||||
|
||||
// Ok, we know how this pattern looks like, now it is time for getting
|
||||
// the actual area:
|
||||
float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.y);
|
||||
weights.x = area.x;
|
||||
weights.y = area.y;
|
||||
|
||||
// Fix corners:
|
||||
coords.y = texcoord.y;
|
||||
|
||||
float2 corner_weight = weights.xy();
|
||||
SMAADetectHorizontalCornerPattern(SMAATexturePass2D(edgesTex),
|
||||
corner_weight,
|
||||
float4(coords.xy(), coords.z, coords.y),
|
||||
d,
|
||||
size,
|
||||
corner_rounding);
|
||||
weights.x = corner_weight.x;
|
||||
weights.y = corner_weight.y;
|
||||
|
||||
#if !defined(SMAA_DISABLE_DIAG_DETECTION)
|
||||
}
|
||||
else
|
||||
e.x = 0.0f; // Skip vertical processing.
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Calculate right and bottom deltas: */
|
||||
image->read_elem_checked(x + 1, y, color);
|
||||
const float Lright = IMB_colormanagement_get_luminance(color);
|
||||
image->read_elem_checked(x, y + 1, color);
|
||||
const float Lbottom = IMB_colormanagement_get_luminance(color);
|
||||
const float Dright = fabsf(L - Lright);
|
||||
const float Dbottom = fabsf(L - Lbottom);
|
||||
SMAA_BRANCH
|
||||
if (e.x > 0.0f) { // Edge at west
|
||||
float2 d;
|
||||
|
||||
/* Calculate the maximum delta in the direct neighborhood: */
|
||||
float max_delta = fmaxf(fmaxf(Dleft, Dright), fmaxf(Dtop, Dbottom));
|
||||
// Find the distance to the top:
|
||||
float3 coords;
|
||||
coords.y = SMAASearchYUp(SMAATexturePass2D(edgesTex),
|
||||
SMAATexturePass2D(searchTex),
|
||||
offset[1].xy(),
|
||||
offset[2].z,
|
||||
size);
|
||||
coords.x = offset[0].x; // offset[1].x = texcoord.x - 0.25 * SMAA_RT_METRICS.x;
|
||||
d.x = coords.y;
|
||||
|
||||
/* Calculate luma used for both left and top edges: */
|
||||
image->read_elem_checked(x - 1, y - 1, color);
|
||||
const float Llefttop = IMB_colormanagement_get_luminance(color);
|
||||
// Fetch the top crossing edges:
|
||||
float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).y;
|
||||
|
||||
/* Left edge */
|
||||
if (it.out[0] != 0.0f) {
|
||||
/* Calculate deltas around the left pixel: */
|
||||
image->read_elem_checked(x - 2, y, color);
|
||||
const float Lleftleft = IMB_colormanagement_get_luminance(color);
|
||||
image->read_elem_checked(x - 1, y + 1, color);
|
||||
const float Lleftbottom = IMB_colormanagement_get_luminance(color);
|
||||
const float Dleftleft = fabsf(Lleft - Lleftleft);
|
||||
const float Dlefttop = fabsf(Lleft - Llefttop);
|
||||
const float Dleftbottom = fabsf(Lleft - Lleftbottom);
|
||||
// Find the distance to the bottom:
|
||||
coords.z = SMAASearchYDown(SMAATexturePass2D(edgesTex),
|
||||
SMAATexturePass2D(searchTex),
|
||||
offset[1].zw(),
|
||||
offset[2].w,
|
||||
size);
|
||||
d.y = coords.z;
|
||||
|
||||
/* Calculate the final maximum delta: */
|
||||
max_delta = fmaxf(max_delta, fmaxf(Dleftleft, fmaxf(Dlefttop, Dleftbottom)));
|
||||
// We want the distances to be in pixel units:
|
||||
d = math::abs(math::round(mad(float2(size.y), d, -float2(pixcoord.y))));
|
||||
|
||||
/* Local contrast adaptation: */
|
||||
if (max_delta > contrast_limit_ * Dleft) {
|
||||
it.out[0] = 0.0f;
|
||||
}
|
||||
}
|
||||
// SMAAArea below needs a sqrt, as the areas texture is compressed
|
||||
// quadratically:
|
||||
float2 sqrt_d = math::sqrt(d);
|
||||
|
||||
/* Top edge */
|
||||
if (it.out[1] != 0.0f) {
|
||||
/* Calculate top-top delta: */
|
||||
image->read_elem_checked(x, y - 2, color);
|
||||
const float Ltoptop = IMB_colormanagement_get_luminance(color);
|
||||
image->read_elem_checked(x + 1, y - 1, color);
|
||||
const float Ltopright = IMB_colormanagement_get_luminance(color);
|
||||
const float Dtoptop = fabsf(Ltop - Ltoptop);
|
||||
const float Dtopleft = fabsf(Ltop - Llefttop);
|
||||
const float Dtopright = fabsf(Ltop - Ltopright);
|
||||
// Fetch the bottom crossing edges:
|
||||
float e2 = SMAASampleLevelZeroOffset(edgesTex, float2(coords.x, coords.z), int2(0, 1), size).y;
|
||||
|
||||
/* Calculate the final maximum delta: */
|
||||
max_delta = fmaxf(max_delta, fmaxf(Dtoptop, fmaxf(Dtopleft, Dtopright)));
|
||||
// Get the area for this direction:
|
||||
float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.x);
|
||||
weights.z = area.x;
|
||||
weights.w = area.y;
|
||||
|
||||
/* Local contrast adaptation: */
|
||||
if (max_delta > contrast_limit_ * Dtop) {
|
||||
it.out[1] = 0.0f;
|
||||
}
|
||||
}
|
||||
// Fix corners:
|
||||
coords.x = texcoord.x;
|
||||
|
||||
float2 corner_weight = weights.zw();
|
||||
SMAADetectVerticalCornerPattern(SMAATexturePass2D(edgesTex),
|
||||
corner_weight,
|
||||
float4(coords.xy(), coords.x, coords.z),
|
||||
d,
|
||||
size,
|
||||
corner_rounding);
|
||||
weights.z = corner_weight.x;
|
||||
weights.w = corner_weight.y;
|
||||
}
|
||||
|
||||
return weights;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* Neighborhood Blending Pixel Shader (Third Pass) */
|
||||
|
||||
static float4 SMAANeighborhoodBlendingPS(float2 texcoord,
|
||||
float4 offset,
|
||||
SMAATexture2D(colorTex),
|
||||
SMAATexture2D(blendTex),
|
||||
#if SMAA_REPROJECTION
|
||||
SMAATexture2D(velocityTex),
|
||||
#endif
|
||||
int2 size)
|
||||
{
|
||||
// Fetch the blending weights for current pixel:
|
||||
float4 a;
|
||||
a.x = SMAASample(blendTex, offset.xy()).w; // Right
|
||||
a.y = SMAASample(blendTex, offset.zw()).y; // Top
|
||||
a.z = SMAASample(blendTex, texcoord).z; // Left
|
||||
a.w = SMAASample(blendTex, texcoord).x; // Bottom
|
||||
|
||||
// Is there any blending weight with a value greater than 0.0?
|
||||
SMAA_BRANCH
|
||||
if (math::dot(a, float4(1.0f, 1.0f, 1.0f, 1.0f)) < 1e-5f) {
|
||||
float4 color = SMAASampleLevelZero(colorTex, texcoord);
|
||||
|
||||
#if SMAA_REPROJECTION
|
||||
float2 velocity = SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, texcoord));
|
||||
|
||||
// Pack velocity into the alpha channel:
|
||||
color.a = math::sqrt(5.0f * math::length(velocity));
|
||||
#endif
|
||||
|
||||
return color;
|
||||
}
|
||||
else {
|
||||
bool h = math::max(a.x, a.z) > math::max(a.y, a.w); // max(horizontal) > max(vertical)
|
||||
|
||||
// Calculate the blending offsets:
|
||||
float4 blendingOffset = float4(0.0f, a.y, 0.0f, a.w);
|
||||
float2 blendingWeight = float2(a.y, a.w);
|
||||
SMAAMovc(float4(h), blendingOffset, float4(a.x, 0.0f, a.z, 0.0f));
|
||||
SMAAMovc(float2(h), blendingWeight, float2(a.x, a.z));
|
||||
blendingWeight /= math::dot(blendingWeight, float2(1.0f, 1.0f));
|
||||
|
||||
// Calculate the texture coordinates:
|
||||
float4 blendingCoord = float4(texcoord, texcoord) + blendingOffset / float4(size, -size);
|
||||
|
||||
// We exploit bilinear filtering to mix current pixel with the chosen
|
||||
// neighbor:
|
||||
float4 color = blendingWeight.x * SMAASampleLevelZero(colorTex, blendingCoord.xy());
|
||||
color += blendingWeight.y * SMAASampleLevelZero(colorTex, blendingCoord.zw());
|
||||
|
||||
#if SMAA_REPROJECTION
|
||||
// Antialias velocity for proper reprojection in a later stage:
|
||||
float2 velocity = blendingWeight.x *
|
||||
SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.xy()));
|
||||
velocity += blendingWeight.y *
|
||||
SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.zw()));
|
||||
|
||||
// Pack velocity into the alpha channel:
|
||||
color.a = math::sqrt(5.0f * math::length(velocity));
|
||||
#endif
|
||||
|
||||
return color;
|
||||
}
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Blending Weight Calculation (Second Pass) */
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
|
||||
SMAABlendingWeightCalculationOperation::SMAABlendingWeightCalculationOperation()
|
||||
SMAAOperation::SMAAOperation()
|
||||
{
|
||||
this->add_input_socket(DataType::Color); /* edges */
|
||||
this->add_input_socket(DataType::Color);
|
||||
this->add_output_socket(DataType::Color);
|
||||
flags_.can_be_constant = true;
|
||||
this->set_corner_rounding(CMP_DEFAULT_SMAA_CORNER_ROUNDING);
|
||||
}
|
||||
|
||||
void SMAABlendingWeightCalculationOperation::set_corner_rounding(float rounding)
|
||||
{
|
||||
/* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 100 */
|
||||
corner_rounding_ = int(scalenorm(0, 100, rounding));
|
||||
}
|
||||
|
||||
void SMAABlendingWeightCalculationOperation::update_memory_buffer_started(
|
||||
MemoryBuffer * /*output*/, const rcti & /*out_area*/, Span<MemoryBuffer *> inputs)
|
||||
{
|
||||
const MemoryBuffer *image = inputs[0];
|
||||
sample_image_fn_ = [=](int x, int y, float *out) { image->read_elem_checked(x, y, out); };
|
||||
}
|
||||
|
||||
void SMAABlendingWeightCalculationOperation::update_memory_buffer_partial(
|
||||
MemoryBuffer *output, const rcti &out_area, Span<MemoryBuffer *> /*inputs*/)
|
||||
{
|
||||
for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
|
||||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
zero_v4(it.out);
|
||||
|
||||
float edges[4];
|
||||
sample_image_fn_(x, y, edges);
|
||||
|
||||
/* Edge at north */
|
||||
float c[4];
|
||||
if (edges[1] > 0.0f) {
|
||||
/* Diagonals have both north and west edges, so calculating weights for them */
|
||||
/* in one of the boundaries is enough. */
|
||||
calculate_diag_weights(x, y, edges, it.out);
|
||||
|
||||
/* We give priority to diagonals, so if we find a diagonal we skip. */
|
||||
/* horizontal/vertical processing. */
|
||||
if (!is_zero_v2(it.out)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Find the distance to the left and the right: */
|
||||
int left = search_xleft(x, y);
|
||||
int right = search_xright(x, y);
|
||||
int d1 = x - left, d2 = right - x;
|
||||
|
||||
/* Fetch the left and right crossing edges: */
|
||||
int e1 = 0, e2 = 0;
|
||||
sample_image_fn_(left, y - 1, c);
|
||||
if (c[0] > 0.0) {
|
||||
e1 += 1;
|
||||
}
|
||||
sample_image_fn_(left, y, c);
|
||||
if (c[0] > 0.0) {
|
||||
e1 += 2;
|
||||
}
|
||||
sample_image_fn_(right + 1, y - 1, c);
|
||||
if (c[0] > 0.0) {
|
||||
e2 += 1;
|
||||
}
|
||||
sample_image_fn_(right + 1, y, c);
|
||||
if (c[0] > 0.0) {
|
||||
e2 += 2;
|
||||
}
|
||||
|
||||
/* Ok, we know how this pattern looks like, now it is time for getting */
|
||||
/* the actual area: */
|
||||
area(d1, d2, e1, e2, it.out); /* R, G */
|
||||
|
||||
/* Fix corners: */
|
||||
if (corner_rounding_) {
|
||||
detect_horizontal_corner_pattern(it.out, left, right, y, d1, d2);
|
||||
}
|
||||
}
|
||||
|
||||
/* Edge at west */
|
||||
if (edges[0] > 0.0f) {
|
||||
/* Did we already do diagonal search for this west edge from the left neighboring pixel? */
|
||||
if (is_vertical_search_unneeded(x, y)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Find the distance to the top and the bottom: */
|
||||
int top = search_yup(x, y);
|
||||
int bottom = search_ydown(x, y);
|
||||
int d1 = y - top, d2 = bottom - y;
|
||||
|
||||
/* Fetch the top and bottom crossing edges: */
|
||||
int e1 = 0, e2 = 0;
|
||||
sample_image_fn_(x - 1, top, c);
|
||||
if (c[1] > 0.0) {
|
||||
e1 += 1;
|
||||
}
|
||||
sample_image_fn_(x, top, c);
|
||||
if (c[1] > 0.0) {
|
||||
e1 += 2;
|
||||
}
|
||||
sample_image_fn_(x - 1, bottom + 1, c);
|
||||
if (c[1] > 0.0) {
|
||||
e2 += 1;
|
||||
}
|
||||
sample_image_fn_(x, bottom + 1, c);
|
||||
if (c[1] > 0.0) {
|
||||
e2 += 2;
|
||||
}
|
||||
|
||||
/* Get the area for this direction: */
|
||||
area(d1, d2, e1, e2, it.out + 2); /* B, A */
|
||||
|
||||
/* Fix corners: */
|
||||
if (corner_rounding_) {
|
||||
detect_vertical_corner_pattern(it.out + 2, x, top, bottom, d1, d2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SMAABlendingWeightCalculationOperation::get_area_of_interest(const int /*input_idx*/,
|
||||
const rcti &output_area,
|
||||
rcti &r_input_area)
|
||||
void SMAAOperation::get_area_of_interest(const int /*input_idx*/,
|
||||
const rcti &output_area,
|
||||
rcti &r_input_area)
|
||||
{
|
||||
r_input_area.xmax = output_area.xmax +
|
||||
fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
|
||||
r_input_area.xmin = output_area.xmin -
|
||||
fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG + 1);
|
||||
r_input_area.ymax = output_area.ymax + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
|
||||
r_input_area.ymin = output_area.ymin -
|
||||
fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG);
|
||||
math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
|
||||
r_input_area.xmin = output_area.xmin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1),
|
||||
SMAA_MAX_SEARCH_STEPS_DIAG + 1);
|
||||
r_input_area.ymax = output_area.ymax +
|
||||
math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
|
||||
r_input_area.ymin = output_area.ymin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1),
|
||||
SMAA_MAX_SEARCH_STEPS_DIAG);
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Diagonal Search Functions */
|
||||
|
||||
int SMAABlendingWeightCalculationOperation::search_diag1(int x, int y, int dir, bool *r_found)
|
||||
void SMAAOperation::update_memory_buffer(MemoryBuffer *output,
|
||||
const rcti & /*area*/,
|
||||
Span<MemoryBuffer *> inputs)
|
||||
{
|
||||
float e[4];
|
||||
int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir;
|
||||
*r_found = false;
|
||||
|
||||
while (x != end) {
|
||||
x += dir;
|
||||
y -= dir;
|
||||
sample_image_fn_(x, y, e);
|
||||
if (e[1] == 0.0f) {
|
||||
*r_found = true;
|
||||
break;
|
||||
}
|
||||
if (e[0] == 0.0f) {
|
||||
*r_found = true;
|
||||
return (dir < 0) ? x : x - dir;
|
||||
}
|
||||
}
|
||||
|
||||
return x - dir;
|
||||
}
|
||||
|
||||
int SMAABlendingWeightCalculationOperation::search_diag2(int x, int y, int dir, bool *r_found)
|
||||
{
|
||||
float e[4];
|
||||
int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir;
|
||||
*r_found = false;
|
||||
|
||||
while (x != end) {
|
||||
x += dir;
|
||||
y += dir;
|
||||
sample_image_fn_(x, y, e);
|
||||
if (e[1] == 0.0f) {
|
||||
*r_found = true;
|
||||
break;
|
||||
}
|
||||
sample_image_fn_(x + 1, y, e);
|
||||
if (e[0] == 0.0f) {
|
||||
*r_found = true;
|
||||
return (dir > 0) ? x : x - dir;
|
||||
}
|
||||
}
|
||||
|
||||
return x - dir;
|
||||
}
|
||||
|
||||
void SMAABlendingWeightCalculationOperation::calculate_diag_weights(int x,
|
||||
int y,
|
||||
const float edges[2],
|
||||
float weights[2])
|
||||
{
|
||||
int d1, d2;
|
||||
bool d1_found, d2_found;
|
||||
float e[4], c[4];
|
||||
|
||||
zero_v2(weights);
|
||||
|
||||
if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) {
|
||||
const MemoryBuffer *image = inputs[0];
|
||||
if (image->is_a_single_elem()) {
|
||||
copy_v4_v4(output->get_elem(0, 0), image->get_elem(0, 0));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Search for the line ends: */
|
||||
if (edges[0] > 0.0f) {
|
||||
d1 = x - search_diag1(x, y, -1, &d1_found);
|
||||
}
|
||||
else {
|
||||
d1 = 0;
|
||||
d1_found = true;
|
||||
}
|
||||
d2 = search_diag1(x, y, 1, &d2_found) - x;
|
||||
const int2 size = int2(image->get_width(), image->get_height());
|
||||
MemoryBuffer edges(DataType::Float2, size.x, size.y);
|
||||
|
||||
if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */
|
||||
int e1 = 0, e2 = 0;
|
||||
float3 luminance_coefficients;
|
||||
IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
|
||||
|
||||
if (d1_found) {
|
||||
/* Fetch the crossing edges: */
|
||||
int left = x - d1, bottom = y + d1;
|
||||
threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
|
||||
for (const int64_t y : sub_y_range) {
|
||||
for (const int64_t x : IndexRange(size.x)) {
|
||||
int2 texel = int2(x, y);
|
||||
float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
|
||||
|
||||
sample_image_fn_(left - 1, bottom, c);
|
||||
if (c[1] > 0.0) {
|
||||
e1 += 2;
|
||||
}
|
||||
sample_image_fn_(left, bottom, c);
|
||||
if (c[0] > 0.0) {
|
||||
e1 += 1;
|
||||
float4 offset[3];
|
||||
SMAAEdgeDetectionVS(coordinates, size, offset);
|
||||
|
||||
float2 edge = SMAALumaEdgeDetectionPS(coordinates,
|
||||
offset,
|
||||
image,
|
||||
threshold_,
|
||||
luminance_coefficients,
|
||||
local_contrast_adaptation_factor_);
|
||||
copy_v2_v2(edges.get_elem(texel.x, texel.y), edge);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (d2_found) {
|
||||
/* Fetch the crossing edges: */
|
||||
int right = x + d2, top = y - d2;
|
||||
MemoryBuffer blending_weights(DataType::Color, size.x, size.y);
|
||||
|
||||
sample_image_fn_(right + 1, top, c);
|
||||
if (c[1] > 0.0) {
|
||||
e2 += 2;
|
||||
}
|
||||
sample_image_fn_(right + 1, top - 1, c);
|
||||
if (c[0] > 0.0) {
|
||||
e2 += 1;
|
||||
MemoryBuffer area_texture(DataType::Float2, AREATEX_WIDTH, AREATEX_HEIGHT);
|
||||
area_texture.copy_from(areaTexBytes, area_texture.get_rect());
|
||||
|
||||
MemoryBuffer search_texture(DataType::Value, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT);
|
||||
search_texture.copy_from(searchTexBytes, search_texture.get_rect());
|
||||
|
||||
threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
|
||||
for (const int64_t y : sub_y_range) {
|
||||
for (const int64_t x : IndexRange(size.x)) {
|
||||
int2 texel = int2(x, y);
|
||||
float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
|
||||
|
||||
float4 offset[3];
|
||||
float2 pixel_coordinates;
|
||||
SMAABlendingWeightCalculationVS(coordinates, size, pixel_coordinates, offset);
|
||||
|
||||
float4 weights = SMAABlendingWeightCalculationPS(coordinates,
|
||||
pixel_coordinates,
|
||||
offset,
|
||||
&edges,
|
||||
&area_texture,
|
||||
&search_texture,
|
||||
float4(0.0f),
|
||||
size,
|
||||
corner_rounding_);
|
||||
copy_v4_v4(blending_weights.get_elem(texel.x, texel.y), weights);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/* Fetch the areas for this line: */
|
||||
area_diag(d1, d2, e1, e2, weights);
|
||||
}
|
||||
threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
|
||||
for (const int64_t y : sub_y_range) {
|
||||
for (const int64_t x : IndexRange(size.x)) {
|
||||
int2 texel = int2(x, y);
|
||||
float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
|
||||
|
||||
/* Search for the line ends: */
|
||||
d1 = x - search_diag2(x, y, -1, &d1_found);
|
||||
sample_image_fn_(x + 1, y, e);
|
||||
if (e[0] > 0.0f) {
|
||||
d2 = search_diag2(x, y, 1, &d2_found) - x;
|
||||
}
|
||||
else {
|
||||
d2 = 0;
|
||||
d2_found = true;
|
||||
}
|
||||
float4 offset;
|
||||
SMAANeighborhoodBlendingVS(coordinates, size, offset);
|
||||
|
||||
if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */
|
||||
int e1 = 0, e2 = 0;
|
||||
|
||||
if (d1_found) {
|
||||
/* Fetch the crossing edges: */
|
||||
int left = x - d1, top = y - d1;
|
||||
|
||||
sample_image_fn_(left - 1, top, c);
|
||||
if (c[1] > 0.0) {
|
||||
e1 += 2;
|
||||
}
|
||||
sample_image_fn_(left, top - 1, c);
|
||||
if (c[0] > 0.0) {
|
||||
e1 += 1;
|
||||
float4 result = SMAANeighborhoodBlendingPS(
|
||||
coordinates, offset, image, &blending_weights, size);
|
||||
copy_v4_v4(output->get_elem(texel.x, texel.y), result);
|
||||
}
|
||||
}
|
||||
|
||||
if (d2_found) {
|
||||
/* Fetch the crossing edges: */
|
||||
int right = x + d2, bottom = y + d2;
|
||||
|
||||
sample_image_fn_(right + 1, bottom, c);
|
||||
if (c[1] > 0.0) {
|
||||
e2 += 2;
|
||||
}
|
||||
if (c[0] > 0.0) {
|
||||
e2 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fetch the areas for this line: */
|
||||
float w[2];
|
||||
area_diag(d1, d2, e1, e2, w);
|
||||
weights[0] += w[1];
|
||||
weights[1] += w[0];
|
||||
}
|
||||
}
|
||||
|
||||
bool SMAABlendingWeightCalculationOperation::is_vertical_search_unneeded(int x, int y)
|
||||
{
|
||||
int d1, d2;
|
||||
bool found;
|
||||
float e[4];
|
||||
|
||||
if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Search for the line ends: */
|
||||
sample_image_fn_(x - 1, y, e);
|
||||
if (e[1] > 0.0f) {
|
||||
d1 = x - search_diag2(x - 1, y, -1, &found);
|
||||
}
|
||||
else {
|
||||
d1 = 0;
|
||||
}
|
||||
d2 = search_diag2(x - 1, y, 1, &found) - x;
|
||||
|
||||
return (d1 + d2 > 2); /* d1 + d2 + 1 > 3 */
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Horizontal/Vertical Search Functions */
|
||||
|
||||
int SMAABlendingWeightCalculationOperation::search_xleft(int x, int y)
|
||||
{
|
||||
int end = x - SMAA_MAX_SEARCH_STEPS;
|
||||
float e[4];
|
||||
|
||||
while (x > end) {
|
||||
sample_image_fn_(x, y, e);
|
||||
if (e[1] == 0.0f) { /* Is the edge not activated? */
|
||||
break;
|
||||
}
|
||||
if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
|
||||
return x;
|
||||
}
|
||||
sample_image_fn_(x, y - 1, e);
|
||||
if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
|
||||
return x;
|
||||
}
|
||||
x--;
|
||||
}
|
||||
|
||||
return x + 1;
|
||||
}
|
||||
|
||||
int SMAABlendingWeightCalculationOperation::search_xright(int x, int y)
|
||||
{
|
||||
int end = x + SMAA_MAX_SEARCH_STEPS;
|
||||
float e[4];
|
||||
|
||||
while (x < end) {
|
||||
x++;
|
||||
sample_image_fn_(x, y, e);
|
||||
if (e[1] == 0.0f || /* Is the edge not activated? */
|
||||
e[0] != 0.0f) /* Or is there a crossing edge that breaks the line? */
|
||||
{
|
||||
break;
|
||||
}
|
||||
sample_image_fn_(x, y - 1, e);
|
||||
if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return x - 1;
|
||||
}
|
||||
|
||||
int SMAABlendingWeightCalculationOperation::search_yup(int x, int y)
|
||||
{
|
||||
int end = y - SMAA_MAX_SEARCH_STEPS;
|
||||
float e[4];
|
||||
|
||||
while (y > end) {
|
||||
sample_image_fn_(x, y, e);
|
||||
if (e[0] == 0.0f) { /* Is the edge not activated? */
|
||||
break;
|
||||
}
|
||||
if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
|
||||
return y;
|
||||
}
|
||||
sample_image_fn_(x - 1, y, e);
|
||||
if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
|
||||
return y;
|
||||
}
|
||||
y--;
|
||||
}
|
||||
|
||||
return y + 1;
|
||||
}
|
||||
|
||||
int SMAABlendingWeightCalculationOperation::search_ydown(int x, int y)
|
||||
{
|
||||
int end = y + SMAA_MAX_SEARCH_STEPS;
|
||||
float e[4];
|
||||
|
||||
while (y < end) {
|
||||
y++;
|
||||
sample_image_fn_(x, y, e);
|
||||
if (e[0] == 0.0f || /* Is the edge not activated? */
|
||||
e[1] != 0.0f) /* Or is there a crossing edge that breaks the line? */
|
||||
{
|
||||
break;
|
||||
}
|
||||
sample_image_fn_(x - 1, y, e);
|
||||
if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return y - 1;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Corner Detection Functions */
|
||||
|
||||
void SMAABlendingWeightCalculationOperation::detect_horizontal_corner_pattern(
|
||||
float weights[2], int left, int right, int y, int d1, int d2)
|
||||
{
|
||||
float factor[2] = {1.0f, 1.0f};
|
||||
float rounding = corner_rounding_ / 100.0f;
|
||||
float e[4];
|
||||
|
||||
/* Reduce blending for pixels in the center of a line. */
|
||||
rounding *= (d1 == d2) ? 0.5f : 1.0f;
|
||||
|
||||
/* Near the left corner */
|
||||
if (d1 <= d2) {
|
||||
sample_image_fn_(left, y + 1, e);
|
||||
factor[0] -= rounding * e[0];
|
||||
sample_image_fn_(left, y - 2, e);
|
||||
factor[1] -= rounding * e[0];
|
||||
}
|
||||
/* Near the right corner */
|
||||
if (d1 >= d2) {
|
||||
sample_image_fn_(right + 1, y + 1, e);
|
||||
factor[0] -= rounding * e[0];
|
||||
sample_image_fn_(right + 1, y - 2, e);
|
||||
factor[1] -= rounding * e[0];
|
||||
}
|
||||
|
||||
weights[0] *= std::clamp(factor[0], 0.0f, 1.0f);
|
||||
weights[1] *= std::clamp(factor[1], 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
void SMAABlendingWeightCalculationOperation::detect_vertical_corner_pattern(
|
||||
float weights[2], int x, int top, int bottom, int d1, int d2)
|
||||
{
|
||||
float factor[2] = {1.0f, 1.0f};
|
||||
float rounding = corner_rounding_ / 100.0f;
|
||||
float e[4];
|
||||
|
||||
/* Reduce blending for pixels in the center of a line. */
|
||||
rounding *= (d1 == d2) ? 0.5f : 1.0f;
|
||||
|
||||
/* Near the top corner */
|
||||
if (d1 <= d2) {
|
||||
sample_image_fn_(x + 1, top, e);
|
||||
factor[0] -= rounding * e[1];
|
||||
sample_image_fn_(x - 2, top, e);
|
||||
factor[1] -= rounding * e[1];
|
||||
}
|
||||
/* Near the bottom corner */
|
||||
if (d1 >= d2) {
|
||||
sample_image_fn_(x + 1, bottom + 1, e);
|
||||
factor[0] -= rounding * e[1];
|
||||
sample_image_fn_(x - 2, bottom + 1, e);
|
||||
factor[1] -= rounding * e[1];
|
||||
}
|
||||
|
||||
weights[0] *= std::clamp(factor[0], 0.0f, 1.0f);
|
||||
weights[1] *= std::clamp(factor[1], 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Neighborhood Blending (Third Pass) */
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
|
||||
SMAANeighborhoodBlendingOperation::SMAANeighborhoodBlendingOperation()
|
||||
{
|
||||
this->add_input_socket(DataType::Color); /* image */
|
||||
this->add_input_socket(DataType::Color); /* blend */
|
||||
this->add_output_socket(DataType::Color);
|
||||
flags_.can_be_constant = true;
|
||||
}
|
||||
|
||||
void SMAANeighborhoodBlendingOperation::update_memory_buffer_partial(MemoryBuffer *output,
|
||||
const rcti &out_area,
|
||||
Span<MemoryBuffer *> inputs)
|
||||
{
|
||||
MemoryBuffer *image1 = inputs[0];
|
||||
MemoryBuffer *image2 = inputs[1];
|
||||
for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
|
||||
const float x = it.x;
|
||||
const float y = it.y;
|
||||
float w[4];
|
||||
|
||||
/* Fetch the blending weights for current pixel: */
|
||||
image2->read_elem_checked(x, y, w);
|
||||
const float left = w[2], top = w[0];
|
||||
image2->read_elem_checked(x + 1, y, w);
|
||||
const float right = w[3];
|
||||
image2->read_elem_checked(x, y + 1, w);
|
||||
const float bottom = w[1];
|
||||
|
||||
/* Is there any blending weight with a value greater than 0.0? */
|
||||
if (right + bottom + left + top < 1e-5f) {
|
||||
image1->read_elem_checked(x, y, it.out);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Calculate the blending offsets: */
|
||||
void (*sample_fn)(MemoryBuffer *reader, int x, int y, float xoffset, float color[4]);
|
||||
float offset1, offset2, weight1, weight2, color1[4], color2[4];
|
||||
|
||||
if (fmaxf(right, left) > fmaxf(bottom, top)) { /* `max(horizontal) > max(vertical)` */
|
||||
sample_fn = sample_bilinear_horizontal;
|
||||
offset1 = right;
|
||||
offset2 = -left;
|
||||
weight1 = right / (right + left);
|
||||
weight2 = left / (right + left);
|
||||
}
|
||||
else {
|
||||
sample_fn = sample_bilinear_vertical;
|
||||
offset1 = bottom;
|
||||
offset2 = -top;
|
||||
weight1 = bottom / (bottom + top);
|
||||
weight2 = top / (bottom + top);
|
||||
}
|
||||
|
||||
/* We exploit bilinear filtering to mix current pixel with the chosen neighbor: */
|
||||
sample_fn(image1, x, y, offset1, color1);
|
||||
sample_fn(image1, x, y, offset2, color2);
|
||||
|
||||
mul_v4_v4fl(it.out, color1, weight1);
|
||||
madd_v4_v4fl(it.out, color2, weight2);
|
||||
}
|
||||
}
|
||||
|
||||
void SMAANeighborhoodBlendingOperation::get_area_of_interest(const int /*input_idx*/,
|
||||
const rcti &output_area,
|
||||
rcti &r_input_area)
|
||||
{
|
||||
r_input_area = output_area;
|
||||
expand_area_for_sampler(r_input_area, PixelSampler::Bilinear);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace blender::compositor
|
||||
|
||||
@@ -4,89 +4,38 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "COM_MultiThreadedOperation.h"
|
||||
#include "COM_NodeOperation.h"
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Edge Detection (First Pass) */
|
||||
|
||||
class SMAAEdgeDetectionOperation : public MultiThreadedOperation {
|
||||
class SMAAOperation : public NodeOperation {
|
||||
protected:
|
||||
float threshold_;
|
||||
float contrast_limit_;
|
||||
float threshold_ = 0.1f;
|
||||
float local_contrast_adaptation_factor_ = 2.0f;
|
||||
int corner_rounding_ = 25;
|
||||
|
||||
public:
|
||||
SMAAEdgeDetectionOperation();
|
||||
SMAAOperation();
|
||||
|
||||
void set_threshold(float threshold);
|
||||
void set_threshold(float threshold)
|
||||
{
|
||||
threshold_ = threshold;
|
||||
}
|
||||
|
||||
void set_local_contrast_adaptation_factor(float factor);
|
||||
void set_local_contrast_adaptation_factor(float factor)
|
||||
{
|
||||
local_contrast_adaptation_factor_ = factor;
|
||||
}
|
||||
|
||||
void set_corner_rounding(int corner_rounding)
|
||||
{
|
||||
corner_rounding_ = corner_rounding;
|
||||
}
|
||||
|
||||
void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
|
||||
void update_memory_buffer_partial(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs) override;
|
||||
};
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Blending Weight Calculation (Second Pass) */
|
||||
|
||||
class SMAABlendingWeightCalculationOperation : public MultiThreadedOperation {
|
||||
private:
|
||||
std::function<void(int x, int y, float *out)> sample_image_fn_;
|
||||
int corner_rounding_;
|
||||
|
||||
public:
|
||||
SMAABlendingWeightCalculationOperation();
|
||||
|
||||
void set_corner_rounding(float rounding);
|
||||
|
||||
void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
|
||||
void update_memory_buffer_started(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs) override;
|
||||
void update_memory_buffer_partial(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs) override;
|
||||
|
||||
private:
|
||||
/* Diagonal Search Functions */
|
||||
/**
|
||||
* These functions allows to perform diagonal pattern searches.
|
||||
*/
|
||||
int search_diag1(int x, int y, int dir, bool *r_found);
|
||||
int search_diag2(int x, int y, int dir, bool *r_found);
|
||||
/**
|
||||
* This searches for diagonal patterns and returns the corresponding weights.
|
||||
*/
|
||||
void calculate_diag_weights(int x, int y, const float edges[2], float weights[2]);
|
||||
bool is_vertical_search_unneeded(int x, int y);
|
||||
|
||||
/* Horizontal/Vertical Search Functions */
|
||||
int search_xleft(int x, int y);
|
||||
int search_xright(int x, int y);
|
||||
int search_yup(int x, int y);
|
||||
int search_ydown(int x, int y);
|
||||
|
||||
/* Corner Detection Functions */
|
||||
void detect_horizontal_corner_pattern(
|
||||
float weights[2], int left, int right, int y, int d1, int d2);
|
||||
void detect_vertical_corner_pattern(
|
||||
float weights[2], int x, int top, int bottom, int d1, int d2);
|
||||
};
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/* Neighborhood Blending (Third Pass) */
|
||||
|
||||
class SMAANeighborhoodBlendingOperation : public MultiThreadedOperation {
|
||||
public:
|
||||
SMAANeighborhoodBlendingOperation();
|
||||
|
||||
void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
|
||||
void update_memory_buffer_partial(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs) override;
|
||||
void update_memory_buffer(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs) override;
|
||||
};
|
||||
|
||||
} // namespace blender::compositor
|
||||
|
||||
Reference in New Issue
Block a user